From 7ed589b4c32ee60e7848534b918465e0e73fb71c Mon Sep 17 00:00:00 2001 From: Hari Date: Thu, 29 Dec 2016 19:00:31 +1100 Subject: [PATCH 3/3] Base storage patch This adds the support of creating VCI index and necessary extra relations that are required for perfoming the columnar storage. --- contrib/Makefile | 3 +- contrib/vci/Makefile | 43 ++ contrib/vci/include/vci.h | 57 ++ contrib/vci/include/vci_columns.h | 153 +++++ contrib/vci/include/vci_freelist.h | 30 + contrib/vci/include/vci_ros.h | 505 ++++++++++++++++ contrib/vci/include/vci_tidcrid.h | 167 ++++++ contrib/vci/include/vci_xact.h | 19 + contrib/vci/readme.txt | 86 +++ contrib/vci/storage/Makefile | 30 + contrib/vci/storage/vci_columns.c | 290 +++++++++ contrib/vci/storage/vci_freelist.c | 42 ++ contrib/vci/storage/vci_index.c | 1098 +++++++++++++++++++++++++++++++++++ contrib/vci/storage/vci_ros.c | 624 ++++++++++++++++++++ contrib/vci/storage/vci_tidcrid.c | 173 ++++++ contrib/vci/storage/vci_xact.c | 39 ++ contrib/vci/vci--1.0.sql | 33 ++ contrib/vci/vci.control | 6 + contrib/vci/vci_main.c | 148 +++++ contrib/vci/vci_supported_funcs.sql | 64 ++ contrib/vci/vci_supported_types.c | 135 +++++ 21 files changed, 3744 insertions(+), 1 deletion(-) create mode 100644 contrib/vci/Makefile create mode 100644 contrib/vci/include/vci.h create mode 100644 contrib/vci/include/vci_columns.h create mode 100644 contrib/vci/include/vci_freelist.h create mode 100644 contrib/vci/include/vci_ros.h create mode 100644 contrib/vci/include/vci_tidcrid.h create mode 100644 contrib/vci/include/vci_xact.h create mode 100644 contrib/vci/readme.txt create mode 100644 contrib/vci/storage/Makefile create mode 100644 contrib/vci/storage/vci_columns.c create mode 100644 contrib/vci/storage/vci_freelist.c create mode 100644 contrib/vci/storage/vci_index.c create mode 100644 contrib/vci/storage/vci_ros.c create mode 100644 contrib/vci/storage/vci_tidcrid.c create mode 100644 contrib/vci/storage/vci_xact.c create mode 100644 contrib/vci/vci--1.0.sql create mode 100644 contrib/vci/vci.control create mode 100644 contrib/vci/vci_main.c create mode 100644 contrib/vci/vci_supported_funcs.sql create mode 100644 contrib/vci/vci_supported_types.c diff --git a/contrib/Makefile b/contrib/Makefile index 25263c0..3b6ae6d 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -49,7 +49,8 @@ SUBDIRS = \ tsm_system_time \ tsearch2 \ unaccent \ - vacuumlo + vacuumlo \ + vci ifeq ($(with_openssl),yes) SUBDIRS += sslinfo diff --git a/contrib/vci/Makefile b/contrib/vci/Makefile new file mode 100644 index 0000000..25b959e --- /dev/null +++ b/contrib/vci/Makefile @@ -0,0 +1,43 @@ +# contrib/vci/Makefile + +MODULE_big = vci + +OBJS = \ + vci_main.o \ + vci_supported_types.o +SUBDIRS = \ + storage + +SUBDIROBJS = $(SUBDIRS:%=%/SUBSYS.o) + +OBJS += $(SUBDIROBJS) + +EXTENSION = vci +DATA = vci--1.0.sql + +PG_CPPFLAGS = -I $(top_srcdir)/contrib/vci/include + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/vci +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +$(SUBDIROBJS): $(SUBDIRS:%=%-recursive) ; + +.PHONY: $(SUBDIRS:%=%-recursive) + +$(SUBDIRS:%=%-recursive): + $(MAKE) -C $(subst -recursive,,$@) SUBSYS.o + +.PHONY: subclean +clean: subclean + +subclean: + for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean || exit; done + diff --git a/contrib/vci/include/vci.h b/contrib/vci/include/vci.h new file mode 100644 index 0000000..9fd8b9c --- /dev/null +++ b/contrib/vci/include/vci.h @@ -0,0 +1,57 @@ +/*------------------------------------------------------------------------- + * + * vci.h - Primary include file for VCI *.c files + * + * This should be the first file included by VCI modules. + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci.h + *------------------------------------------------------------------------- + */ +#ifndef VCI_H +#define VCI_H + +#include "tcop/utility.h" +#include "utils/relcache.h" /* for Relation */ + +#define VCI_STRING "vci" +#define VCI_INTERNAL_RELATION_TEMPLATE "vci_%010d_%05d_%c" + +#ifdef WIN32 +#define strtok_r strtok_s +#endif + +typedef struct VciGucStruct +{ + bool enable; +} VciGucStruct; + +extern PGDLLEXPORT VciGucStruct VciGuc; + +/* for index_build */ +typedef enum +{ + vcirc_invalid = 0, + vcirc_reindex, + vcirc_truncate, + vcirc_vacuum_full, + vcirc_cluster, + vcirc_alter_table, + vcirc_num +} vci_RebuildCommand; + +extern vci_RebuildCommand vci_rebuild_command; + +extern void vci_add_index_delete(Relation heapRel, ItemPointer heap_tid, TransactionId xmin); +extern bool vci_add_drop_relation(Oid relOid, int flags); +extern bool vci_add_skip_vci_index(Relation indexRel); +extern void vci_process_utility(Node *parseTree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + DestReceiver *destReceiver, char *completionTag); + +extern ProcessUtility_hook_type process_utility_prev; +extern bool vci_is_supported_type(Oid oid); +#endif /* VCI_H */ diff --git a/contrib/vci/include/vci_columns.h b/contrib/vci/include/vci_columns.h new file mode 100644 index 0000000..25dab43 --- /dev/null +++ b/contrib/vci/include/vci_columns.h @@ -0,0 +1,153 @@ +/*------------------------------------------------------------------------- + * + * vci_columns.h - Definitions and declarations of VCI column store and extents + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_columns.h + *------------------------------------------------------------------------- + */ + +#ifndef VCI_COLUMNS_H +#define VCI_COLUMNS_H + +#include "vci_tidcrid.h" + +/** header page ID of column meta data */ +#define VCI_COLUMN_META_HEADER_PAGE_ID (0) +/** First page of Column data relations */ +#define VCI_COLUMN_DATA_FIRST_PAGE_ID (0) +/** Column number of Column meta header page */ +#define VCI_NUM_COLUMN_META_HEADER_PAGE (1) +/** Column ID of first Normal Column */ +#define VCI_FIRST_NORMALCOLUMN_ID (0) +/** Column ID of special column */ +#define VCI_COLUMN_ID_TID (-1) +#define VCI_COLUMN_ID_NULL (-2) +#define VCI_COLUMN_ID_DELETE (-3) +#define VCI_COLUMN_ID_CRID (-4) + +/** The data below are not column-stored data. + * We prepare them for convenience. + */ +#define VCI_COLUMN_ID_TID_CRID (-5) +#define VCI_COLUMN_ID_TID_CRID_UPDATE (-6) +#define VCI_COLUMN_ID_TID_CRID_WRITE (-7) +#define VCI_COLUMN_ID_TID_CRID_CDR (-8) +#define VCI_COLUMN_ID_DATA_WOS (-9) +#define VCI_COLUMN_ID_WHITEOUT_WOS (-10) +#define VCI_INVALID_COLUMN_ID ((int16) -11) +/** Vector bit count in one item (tuple) for delete vector */ +#define VCI_NUM_ROWS_IN_ONE_ITEM_FOR_DELETE (1024) +/** Item number in page for delete vector */ +#define VCI_ITEMS_IN_PAGE_FOR_DELETE (52) +/** Page number in extent for delete vector */ +#define VCI_NUM_PAGES_IN_EXTENT_FOR_DELETE (5) + +/** maximum number of common dictionaries in a column + */ +#define VCI_NUM_COMMON_DICT (10) + +/**************************************************************** + * ** CAUTION: IF YOU HAVE MODIFIED IN vcis_XXXXXXXXXXX_t, ** + * ** INCREMENT VCI_ROS_VERSION_MAJOR OR VCI_ROS_VERSION_MINOR ** + * ** DEFINED AT THE TOP OF THIS HEADER FILE. ** + * ************************************************************** + */ + +/** + * common dictionary info of each column + */ +typedef struct vcis_c_common_dict +{ + BlockNumber block_number;/**< the position in the column data relation */ + BlockNumber num_blocks; /**< the length in DB page unit */ +} vcis_c_common_dict_t; + +typedef struct vcis_column_meta +{ + vcis_attribute_type_t vcis_attr_type;/**< Attribute type */ + + Oid pgsql_atttypid; /**< taken from FormData_pg_attribute.atttypid */ + int16 pgsql_attnum; /**< taken from FormData_pg_attribute.attnum */ + int16 pgsql_attlen; /**< taken from FormData_pg_attribute.attlen */ + int32 pgsql_atttypmod; /**< taken from FormData_pg_attribute.atttypmod */ + uint32 num_extents; /**< number of extents (for debug) */ + uint32 num_extents_old; /**< previous number of extents (for recovery) */ + + BlockNumber free_page_begin_id; /**< page ID of the first free area */ + BlockNumber free_page_begin_id_old; /**< previous free_page_begin_id (for recovery) */ + + BlockNumber free_page_end_id; /**< page ID of the last free area */ + BlockNumber free_page_end_id_old; /**< previous free_page_end_id (for recovery) */ + + /** + * The DB page ID of free area that located in front of the added or + * deleted extent by the ROS command. (for recovery) + * This is used to recover free area list. + */ + BlockNumber free_page_prev_id; + + /** + * Same as free_page_prev_id, but just behind the added or deleted extent. + */ + BlockNumber free_page_next_id; + + /** + * The freespace size of added or deleted extent by the ROS command (for recovery) + */ + uint32 free_page_old_size; + + /** + * The freespace position of added or deleted extent in BlockNumber + * by the ROS command (for recovery) + */ + BlockNumber new_data_head; + BlockNumber new_freespace_head; + + BlockNumber num_free_pages; /**< number of free DB pages in the listed free area */ + BlockNumber num_free_pages_old; /**< for recovery */ + BlockNumber num_free_page_blocks;/**< number of free areas, not number of free DB pages */ + BlockNumber num_free_page_blocks_old;/**< for recovery */ + + + uint32 common_flag_0; /**< vcis_column_meta_flag */ + + uint32 min_max_field_size; /**< size of min_max field size */ + uint32 min_max_content_size; /**< size of min_max content size */ + uint16 num_common_dicts; /**< Number of common dictionarys */ + int16 latest_common_dict_id; /**< Id of the latest common dictionary */ + uint32 common_dict_info_offset; /**< offset of common_dict_info[0] */ + uint32 block_number_extent_offset; /**< offset of extent_pointer[0] */ + + vcis_c_common_dict_t common_dict_info[1];/**< common dictionary informations */ + /* block_number_extent follows common_dict_info[num_common_dict - 1] */ +} vcis_column_meta_t; + + +typedef vci_RelationPair vci_ColumnRelations; + +extern vcis_column_meta_t *vci_GetColumnMeta(Buffer *buffer, Relation rel); + + +extern void vci_OpenColumnRelations(vci_ColumnRelations *rel, + vci_MainRelHeaderInfo *info, + int16 columnId, + LOCKMODE lockmode); + +extern void vci_CloseColumnRelations(vci_ColumnRelations *rel, + LOCKMODE lockmode); + +extern void vci_InitializeColumnRelations(vci_MainRelHeaderInfo *info, + TupleDesc tupdesc, + Relation heapRel); + +static inline void +vci_WriteColumnMetaDataHeader(Relation relMeta, + Buffer buffer) +{ + vci_WriteOneItemPage(relMeta, buffer); +} + +#endif /* VCI_COLUMNS_H */ diff --git a/contrib/vci/include/vci_freelist.h b/contrib/vci/include/vci_freelist.h new file mode 100644 index 0000000..1e38bac --- /dev/null +++ b/contrib/vci/include/vci_freelist.h @@ -0,0 +1,30 @@ +/*------------------------------------------------------------------------- + * + * vci_freelist.h - Definitions and declarations of Free space link list + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_freelist.h + *------------------------------------------------------------------------- + */ + +#ifndef VCI_FREELIST_H +#define VCI_FREELIST_H + +#include "vci_columns.h" + +#define VCI_FREESPACE_ITEM_ID FirstOffsetNumber + +typedef struct vcis_free_space +{ + uint32 size; + vcis_extent_type_t type; + BlockNumber prev_pos; + BlockNumber next_pos; +} vcis_free_space_t; + + +extern vcis_free_space_t *vci_GetFreeSpace(vci_RelationPair *relPair, BlockNumber blk); + +#endif /* VCI_FREELIST_H */ diff --git a/contrib/vci/include/vci_ros.h b/contrib/vci/include/vci_ros.h new file mode 100644 index 0000000..f2eeec9 --- /dev/null +++ b/contrib/vci/include/vci_ros.h @@ -0,0 +1,505 @@ +/*------------------------------------------------------------------------- + * + * vci_ros.h - Definitions and declarations of VCI main relation + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_ros.h + *------------------------------------------------------------------------- + */ + +#ifndef VCI_ROS_H +#define VCI_ROS_H + +#include "access/htup_details.h" +#include "nodes/execnodes.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" +#include "utils/rel.h" + + +#define VCI_ROS_VERSION_MAJOR ((uint32) 0x00000000) +#define VCI_ROS_VERSION_MINOR ((uint32) 0x0000000D) + +#define SizeOfIptrData \ + (offsetof(ItemPointerData, ip_posid) + sizeof(OffsetNumber)) + +/* from src/backend/utils/adt/tid.c */ +#define DatumGetItemPointer(X) ((ItemPointer) DatumGetPointer(X)) +#define ItemPointerGetDatum(X) PointerGetDatum(X) + +/** + * @brief IDs of ROS commands. + */ +typedef enum vci_ros_command +{ + vci_rc_invalid = -11, /**< Invalid case. */ + + /** For vacuum with vci_mrlm_read_write_exclusive. */ + vci_rc_vacuum = -10, + + /** For normal query with vci_mrlm_read_share. */ + vci_rc_query = -9, + + /** For DROP command with vci_mrlm_read_write_exclusive. */ + vci_rc_drop_index = -8, + + /** For DELETE or UPDATE commands with vci_mrlm_read_share. */ + vci_rc_wos_delete = -7, + + /** For INSERT or UPDATE commands with vci_mrlm_read_share. */ + vci_rc_wos_insert = -6, + + /** For recovering ROS with vci_mrlm_read_share, assumed that this command + * is used in vci_mrlm_write_exclusive lock of ROS commands. */ + vci_rc_recovery = -5, + + /** For collecting VCI information with vci_mrlm_read_share. + * This is also used by vci_KeepMainRelHeader() and + * vci_KeepMainRelHeaderWOVersionCheck() automatically. + * */ + vci_rc_probe = -4, + + /** For building ROS in initial index building with + * vci_mrlm_read_write_exclusive. */ + vci_rc_wos_ros_conv_build = -3, + + /** For building local ROS with vci_mrlm_read_write_exclusive, to serialize + * ROS commands. + */ + vci_rc_generate_local_ros = -2, + + /** For COPY command with vci_mrlm_write_share. */ + vci_rc_copy_command = -1, + + /** For WOS -> ROS conversion with vci_mrlm_write_exclusive */ + vci_rc_wos_ros_conv = 0, + + /** For updating delete vector with vci_mrlm_write_exclusive */ + vci_rc_update_del_vec, + + /** For collecting deleted rows with vci_mrlm_write_exclusive */ + vci_rc_collect_deleted, + + /** For collecting deleted extents, unable to access anymore, + * with vci_mrlm_write_exclusive + */ + vci_rc_collect_extent, + + /** For updating TID -> CRID relations with vci_mrlm_write_exclusive */ + vci_rc_update_tid_crid, + + /** For compaction with vci_mrlm_write_exclusive */ + /* vci_rc_compaction, */ + + num_vci_rc, /**< anchor */ +} vci_ros_command_t; + +/** Value indicating invalid dictionary. The value is -1 */ +#define VCI_INVALID_DICTIONARY_ID (-1) + +/** Base alignment in storage. + * In the storage, normally VCI uses four-byte integers. + * Thus, we align the data in the storage by four bytes. + */ +#define VCI_DATA_ALIGNMENT_IN_STORAGE (4) + +/** Aligned values, rounded up */ +#define vci_RoundUpValue(value, unit) \ + ((((value) + (unit) - 1) / (unit)) * (unit)) +/** Aligned values, rounded down */ +#define vci_RoundDownValue(value, unit) \ + (((value) / (unit)) * (unit)) + +/** Get byte size of data in an item when a page contains multiple items. + * @param[in] numItem Number of items in a page. + * @return The size of data in an item in byte. + */ +#define VCI_ITEM_SPACE(numItem) \ + ((((BLCKSZ - offsetof(PageHeaderData, pd_linp) \ + - (numItem * (sizeof(HeapTupleHeaderData) + sizeof(ItemIdData)))) \ + / numItem) / VCI_DATA_ALIGNMENT_IN_STORAGE) \ + * VCI_DATA_ALIGNMENT_IN_STORAGE) + +/** Minimum header space in DB page with one item, normally 52 byts */ +#define VCI_MIN_PAGE_HEADER \ + (SizeOfPageHeaderData + sizeof(HeapTupleHeaderData) \ + + sizeof(ItemIdData)) + +/** Available area in DB page with one item, normally 8140 bytes */ +#define VCI_MAX_PAGE_SPACE (BLCKSZ - VCI_MIN_PAGE_HEADER) + + +/** + * @brief Field names and addresses of VCI main relation. + * See * Table 5-1. + * These enum values has the page ID at upper 16 bits, and offset for the + * field at lower 16 bits. + * The offset is measured from the top of DB page, not after the page header. + * + * This is for struct vcis_main_t. + * Because the header ov VCI main relation has three pages, we can not map + * one structure of C on the header pages. + * + * Minimum header in DB page is 52 bytes (0x34) + * + * @note CAUTION: IF YOU HAVE MODIFIED IN vci_MainRelVar, + * INCREMENT VCI_ROS_VERSION_MAJOR OR VCI_ROS_VERSION_MINOR + * DEFINED AT THE TOP OF THIS HEADER FILE. + */ +typedef enum vci_MainRelVar +{ + /* page 0 */ + vcimrv_data_wos_oid = 0x00000034, + vcimrv_whiteout_wos_oid = 0x00000038, + vcimrv_tid_crid_meta_oid = 0x00000040, + vcimrv_tid_crid_data_oid = 0x00000044, + vcimrv_tid_crid_update_oid_0 = 0x00000048, + vcimrv_tid_crid_update_oid_1 = 0x0000004C, + vcimrv_delete_meta_oid = 0x00000054, + vcimrv_delete_data_oid = 0x00000058, + vcimrv_null_meta_oid = 0x0000005C, + vcimrv_null_data_oid = 0x00000060, + vcimrv_tid_meta_oid = 0x00000064, + vcimrv_tid_data_oid = 0x00000068, + vcimrv_ros_version_major = 0x0000006C,/** MUST BE 0x0000006C */ + vcimrv_ros_version_minor = 0x00000070,/** MUST BE 0x00000070 */ + vcimrv_num_nullable_columns = 0x00000074, + vcimrv_null_width_in_byte = 0x00000078,/** byte size of null bit vector for one row. */ + vcimrv_column_info_offset = 0x0000007C, + vcimrv_num_columns = 0x00000080, + vcimrv_extent_info_offset = 0x00000084, + /* page 0 to 2 */ + vcimrv_column_info = 0x00000088, + /* page 3 */ + vcimrv_size_mr = 0x00030034, + vcimrv_size_mr_old = 0x00030038, + vcimrv_current_ros_version = 0x0003003C, + vcimrv_last_ros_version = 0x00030040, + vcimrv_tid_crid_diff_sel = 0x00030044, + vcimrv_tid_crid_diff_sel_old = 0x00030048, + vcimrv_xid_generation = 0x0003004C, + vcimrv_xid_gen_udpate_xid = 0x00030050, + vcimrv_ros_command = 0x00030060, + vcimrv_old_extent_id = 0x0003006C, + vcimrv_new_extent_id = 0x00030070, + vcimrv_working_column_id = 0x00030074, + vcimrv_working_dictionary_id = 0x00030078, + vcimrv_tid_crid_operation = 0x0003007C, + vcimrv_tid_crid_target_blocknumber = 0x00030080, + vcimrv_tid_crid_target_info = 0x00030084, + vcimrv_tid_crid_free_blocknumber = 0x00030088, + vcimrv_num_unterminated_copy_cmd = 0x0003008C, + vcimrv_tid_crid_tag_bitmap = 0x00030090, + vcimrv_num_extents = 0x000300A0, + vcimrv_num_extents_old = 0x000300A4, + vcimrv_extent_info = 0x000300A8, + + /* error code */ + vcimrv_invalid = 0xFFFFFFFF, +} vci_MainRelVar; + +/** mask data to get offset for fileds in VCI main relation header in DB page */ +#define VCI_MRV_MASK_OFFSET (0xFFFF) +/** bit to shift to get DB page ID for fileds in VCI main relation header */ +#define VCI_MRV_PAGE_SHIFT (16) + +/** + * @brief Get block number for given field of main relation header. + * @param[in] value value defined in vci_MainRelVar. + * @return Block number containing given field. + */ +#define vci_MRVGetBlockNumber(value) ((value) >> VCI_MRV_PAGE_SHIFT) + +/** + * @brief Get offset in DB page for given field of main relation header. + * @param[in] value value defined in vci_MainRelVar. + * @return Offset for containing given field from page top including header. + */ +#define vci_MRVGetOffset(value) ((value) & VCI_MRV_MASK_OFFSET) + +/** Number of header pages of VCI main relation */ +#define VCI_NUM_MAIN_REL_HEADER_PAGES (4) + +/** Struct to keep pointers to the header pages of VCI main relation */ +typedef struct vci_MainRelHeaderInfo +{ + Relation rel;/**< Relation of VCI main relation */ + + /* VCI mainrelation header pages + * should be initialized with InvalidBuffer + */ + Buffer buffer[VCI_NUM_MAIN_REL_HEADER_PAGES]; /**< Buffers for the main relation header pages. */ + vci_ros_command_t command; /**< Command using this structure. */ + + /** number of extents that have the area to store their vcis_m_extent_t + * in main relation. + * This field is used in query execution, otherwise it has "-1". + */ + int32 num_extents_allocated; +} vci_MainRelHeaderInfo; + + +/**************************************************************** + * ** CAUTION: IF YOU HAVE MODIFIED IN vci_m_column_t, ** + * ** INCREMENT VCI_ROS_VERSION_MAJOR OR VCI_ROS_VERSION_MINOR ** + * ** DEFINED AT THE TOP OF THIS HEADER FILE. ** + * ************************************************************** + */ +/** One entry of column_info in VCI main relation + * Table 5-2 + */ +typedef struct vcis_m_column +{ + Oid meta_oid; /** OID of metadata relation */ + Oid data_oid; /** OID of data relation */ + /* + int16 max_columns_size; + */ + /** AttrNumber original_attribute_number; */ + int16 max_columns_size; + int16 comp_type; /** vcis_compression_type_t */ +} vcis_m_column_t; + +/**************************************************************** + * ** CAUTION: IF YOU HAVE MODIFIED IN vci_m_extent_t, ** + * ** INCREMENT VCI_ROS_VERSION_MAJOR OR VCI_ROS_VERSION_MINOR ** + * ** DEFINED AT THE TOP OF THIS HEADER FILE. ** + * ************************************************************** + */ +/** One entry of extent_info in VCI main relation + * Table 5-3 + */ +typedef struct vcis_m_extent +{ + /** number of rows recorded, including marked as deleted. */ + uint32 num_rows; + uint32 num_deleted_rows; /**< number of rows marked as deleted. */ + uint32 num_deleted_rows_old; /**< num_deleted_rows for recovery */ + TransactionId xgen; /**< like xmin */ + TransactionId xdel; /**< like xmax */ + + uint16 flags; + uint16 recovered_colid; +} vcis_m_extent_t; + + +/** + * @brief + * VCI main relation header area to store by vci_WriteMainRelVar() + * vci_wmrv_all is used when the VCI relation is built, since first two or + * three pages are defined in building time, then not modified at all. + * The last page has ROS command, current ROS version, and extent information + * so will be updated after creation. vci_wmrv_update is used when the last + * page is updated. + */ +typedef enum vci_wmrv_t +{ + vci_wmrv_update,/** Only the last header page will be wrote to storage */ + vci_wmrv_all /** All the header pages will be wrote to storage */ +} vci_wmrv_t; + +/**************************************************************** + * ** CAUTION: IF YOU HAVE MODIFIED IN vcis_attribute_type_t, ** + * ** INCREMENT VCI_ROS_VERSION_MAJOR OR VCI_ROS_VERSION_MINOR ** + * ** DEFINED AT THE TOP OF THIS HEADER FILE. ** + * ************************************************************** + */ +/** I categorized ROS data like TID, NULL bit vector, normal column data + * as shown below. + * That is extended from table 5-6. + */ +/* table 5-6 */ +typedef enum vcis_attribute_type_t +{ + vcis_attribute_type_main = 0, /* data only */ + vcis_attribute_type_data_wos, /* data only */ + vcis_attribute_type_whiteout_wos, /* data only */ + vcis_attribute_type_tid_crid, /* special type, meta and data */ + vcis_attribute_type_tid_crid_update,/* data only */ /* two elements */ + vcis_attribute_type_delete_vec, /* normal column type */ + vcis_attribute_type_null_vec, /* normal column type */ + vcis_attribute_type_tid, /* normal column type */ + vcis_attribute_type_pgsql, /* normal column type */ + /* number of indexed columns */ + num_vcis_attribute_type +} vcis_attribute_type_t; + + +/**************************************************************** + * ** CAUTION: IF YOU HAVE MODIFIED IN vcis_compression_type_t,** + * ** INCREMENT VCI_ROS_VERSION_MAJOR OR VCI_ROS_VERSION_MINOR ** + * ** DEFINED AT THE TOP OF THIS HEADER FILE. ** + * ************************************************************** + */ +/** table 5-7 */ +typedef enum vcis_compression_type_t +{ + vcis_compression_type_invalid = -1, + vcis_compression_type_fixed_raw = 0, + vcis_compression_type_variable_raw, + vcis_compression_type_fixed_comp, /**< reserved */ + vcis_compression_type_auto, /**< reserved */ + vcis_compression_type_lzvf, + vcis_compression_type_rle_fixed, + vcis_compression_type_rle_variable, + num_vcis_compression_type +} vcis_compression_type_t; + +/** Function tells if dictionary is necessary for the given compression type. + * @param[in] compression_type Compression type of the column, one of + * vcis_compression_type_t. + * @retval true Dictionary is necessary. + * @retval false The compression method does not need dictionary. + */ +#define vci_UseDictionary(compression_type) \ + (vcis_compression_type_lzvf == (compression_type)) + + +/**************************************************************** + * ** CAUTION: IF YOU HAVE MODIFIED IN vcis_extent_type_t, ** + * ** INCREMENT VCI_ROS_VERSION_MAJOR OR VCI_ROS_VERSION_MINOR ** + * ** DEFINED AT THE TOP OF THIS HEADER FILE. ** + * ************************************************************** + */ +/** table 5-16 */ +typedef enum vcis_extent_type_t +{ + /** initial value is zero, since newly created DB page is filled with zero. + */ + vcis_undef_space = 0, + + vcis_extent_type_data, + vcis_extent_type_dict, + vcis_free_space, + + vcis_tidcrid_type_leaf, + vcis_tidcrid_type_trunk, + vcis_tidcrid_type_pagetag, + + num_vcis_extent_type +} vcis_extent_type_t, vcis_tidcrid_item_type_t; + +typedef struct vci_RelationPair +{ + vci_MainRelHeaderInfo *info; + + Relation meta; + Relation data; + + Buffer bufMeta; + Buffer bufData; +} vci_RelationPair; + +extern void vci_InitMainRelHeaderInfo(vci_MainRelHeaderInfo *info, + Relation rel, + vci_ros_command_t command); +extern void vci_KeepMainRelHeaderWithoutVersionCheck(vci_MainRelHeaderInfo *info); +extern void vci_KeepMainRelHeader(vci_MainRelHeaderInfo *info); + +extern void vci_ReleaseMainRelHeader(vci_MainRelHeaderInfo *info); + +extern void vci_SetMainRelVar(vci_MainRelHeaderInfo *info, + vci_MainRelVar var, + int elemId, + uint32 value); +extern uint32 vci_GetMainRelVar(vci_MainRelHeaderInfo *info, + vci_MainRelVar var, + int elemId); +extern void vci_WriteMainRelVar(vci_MainRelHeaderInfo *info, + vci_wmrv_t writeArea); + +extern void vci_InitPageCore(Buffer buffer, int16 numItem, bool locked); + +extern Buffer vci_ReadBufferWithPageInit(Relation reln, BlockNumber blockNumber); + +/* + * In order to keep the heap tuple plane, set 'p' to attstorage in + * FormData_pg_attribute. + */ + +extern vci_MainRelVar vci_GetMColumnPosition(int16 columnId); +extern vcis_m_column_t *vci_GetMColumn(vci_MainRelHeaderInfo *info, int16 columnId); + +extern int16 vci_GetColumnWorstSize(Form_pg_attribute attr); + +/* + * ********************************************************* + * functions to recover ROS + * ********************************************************* + */ +extern void +vci_PreparePagesIfNecessaryCore(Relation rel, + BlockNumber blockNumber, + uint16 numItems, + bool forceInit, + bool logItems); + +/** + * @brief + * This function checks if the relation has the DB page with the page ID + * blockNumber. + * When it does not exists, the function extends the relation and initialize + * extended pages with one item per page. + * @param[in] rel The relation. + * @param[in] blockNumber The block number to be examined. + * @param[in] numItems The number of items the page is initialized with. + */ +static inline void +vci_FormatPageWithItems(Relation rel, BlockNumber blockNumber, int16 numItems) +{ + vci_PreparePagesIfNecessaryCore(rel, blockNumber, numItems, true, false); +} + +static inline void +vci_PreparePagesIfNecessary(Relation rel, BlockNumber blockNumber, uint16 numItems) +{ + vci_PreparePagesIfNecessaryCore(rel, blockNumber, numItems, false, false); +} + +extern void vci_WriteItem(Relation rel, + Buffer buffer, + OffsetNumber itemId); + +extern int vci_GetNumberOfNullableColumn(Relation rel); + +/* + * + */ +static inline void +vci_PreparePagesWithOneItemIfNecessary(Relation relation, + BlockNumber blockNumber) +{ + vci_PreparePagesIfNecessary(relation, blockNumber, 1); +} + +/* this function set the dirty bit, and write all the items in the page + * to the WAL. + * argumtents + * Relation rel + * Buffer buffer + */ +static inline void +vci_WriteOneItemPage(Relation rel, + Buffer buffer) +{ + vci_WriteItem(rel, buffer, FirstOffsetNumber); +} + + +static inline void +vci_FormatPageWithOneItem(Relation rel, BlockNumber blockNumber) +{ + vci_FormatPageWithItems(rel, blockNumber, 1); +} + +/* ---------------- + * vci_index.c + * ---------------- + */ +extern bool vci_isVciIndexRelation(Relation rel); +extern bool vci_isVciAdditionalRelation(Relation rel); +extern bool vci_isVciAdditionalRelationTuple(Oid reloid, Form_pg_class reltuple); + +#endif /* VCI_ROS_H */ diff --git a/contrib/vci/include/vci_tidcrid.h b/contrib/vci/include/vci_tidcrid.h new file mode 100644 index 0000000..83785f4 --- /dev/null +++ b/contrib/vci/include/vci_tidcrid.h @@ -0,0 +1,167 @@ +/*------------------------------------------------------------------------- + * + * vci_tidcrid.h - Definitions and Declarations of TIDCRID update list and + * TIDCRID Tree relation + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_tidcrid.h + *------------------------------------------------------------------------- + */ + +#ifndef VCI_TIDCRID_H +#define VCI_TIDCRID_H + +#include "vci_ros.h" +#include "vci_freelist.h" + +/** header page ID of TID->CRID update (differential) list */ +#define VCI_TID_CRID_UPDATE_HEADER_PAGE_ID (0) +/** first body page ID of TID->CRID update (differential) list */ +#define VCI_TID_CRID_UPDATE_BODY_PAGE_ID (1) +/** First page of tidcrid tree meta relation */ +#define VCI_TID_CRID_META_FIRST_PAGE_ID (0) +/** First page of tidcrid tree data relation */ +#define VCI_TID_CRID_DATA_FIRST_PAGE_ID (0) +/** Item number in page for tidcrid tree relation */ +#define VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE (18) +/** Offset number of page tag */ +#define VCI_TID_CRID_PAGETAG_ITEM_ID (VCI_FREESPACE_ITEM_ID) +/** Capacity of tidcrid leaf node in bit*/ +#define VCI_TID_CRID_LEAF_CAPACITY_BITS (6) +/** Capacity of tidcrid leaf node in bit*/ +#define VCI_TID_CRID_LEAF_CAPACITY (1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) +/** Capacity of tidcrid trunk node in bit*/ +#define VCI_TID_CRID_TRUNK_CAPACITY_BITS (6) +/** Capacity of tidcrid trunk node in bit*/ +#define VCI_TID_CRID_TRUNK_CAPACITY (1 << VCI_TID_CRID_TRUNK_CAPACITY_BITS) +/** Index of trunk node */ +#define VCI_TID_CRID_TRUNKNODE (-1) +/** The number of items in DB page of TID-CRID Update List, normally 678 */ +#define VCI_TID_CRID_UPDATE_PAGE_ITEMS (VCI_MAX_PAGE_SPACE / sizeof(vcis_tidcrid_pair_item_t)) +/** Available area in DB page of TID-CRID Update List, normally 8136 bytes */ +#define VCI_TID_CRID_UPDATE_PAGE_SPACE (VCI_TID_CRID_UPDATE_PAGE_ITEMS * sizeof(vcis_tidcrid_pair_item_t)) + + +#define VCI_TID_CRID_UPDATE_CONTEXT_SAMPLES (1353) + +typedef struct vcis_Crid +{ + uint16 v0; + uint16 v1; + uint16 v2; +} +#ifdef __arm__ +__attribute__((packed)) +#endif +vcis_Crid; + + +typedef struct vcis_tidcrid_meta_item +{ + BlockNumber block_number; + BlockNumber block_number_old; + int16 item_id; + int16 item_id_old; +} vcis_tidcrid_meta_item_t; + +typedef struct vcis_tidcrid_meta +{ + vcis_attribute_type_t vcis_attr_type;/**< Attribute type */ + + Oid pgsql_atttypid; /**< taken from FormData_pg_attribute.atttypid */ + int16 pgsql_attnum; /**< taken from FormData_pg_attribute.attnum */ + int16 pgsql_attlen; /**< taken from FormData_pg_attribute.attlen */ + int32 pgsql_atttypmod; /**< taken from FormData_pg_attribute.atttypmod */ + uint32 num_extents; /**< number of extents (for debug) */ + uint32 num_extents_old; /**< previous number of extents (for recovery) */ + + BlockNumber free_page_begin_id; /**< page ID of the first free area */ + BlockNumber free_page_begin_id_old; /**< previous free_page_begin_id (for recovery) */ + + BlockNumber free_page_end_id; /**< page ID of the last free area */ + BlockNumber free_page_end_id_old; /**< previous free_page_end_id (for recovery) */ + + /** + * The DB page ID of free area that located in front of the added or + * deleted extent by the ROS command. (for recovery) + * This is used to recover free area list. + */ + BlockNumber free_page_prev_id; + + /** + * Same as free_page_prev_id, but just behind the added or deleted extent. + */ + BlockNumber free_page_next_id; + + /** + * The freespace size of added or deleted extent by the ROS command (for recovery) + */ + uint32 free_page_old_size; + + /** + * The freespace position of added or deleted extent in BlockNumber + * by the ROS command (for recovery) + */ + BlockNumber new_data_head; + BlockNumber new_freespace_head; + + BlockNumber num_free_pages; /**< number of free DB pages in the listed free area */ + BlockNumber num_free_pages_old; /**< for recovery */ + BlockNumber num_free_page_blocks;/**< number of free areas, not number of free DB pages */ + BlockNumber num_free_page_blocks_old;/**< for recovery */ + + BlockNumber num; + BlockNumber num_old; + BlockNumber free_block_number; + int32 offset; + vcis_tidcrid_meta_item_t body[1]; +} vcis_tidcrid_meta_t; + +typedef struct vcis_tidcrid_pagetag +{ + uint32 size; + vcis_extent_type_t type; + BlockNumber prev_pos; + BlockNumber next_pos; + uint32 num; + uint32 free_size; + uint32 bitmap; + char rsv[4]; /** 8 byte alignment */ +} vcis_tidcrid_pagetag_t; + +typedef struct vcis_tidcrid_pair_item +{ + ItemPointerData page_item_id; + vcis_Crid crid; /**< CRID */ +} vcis_tidcrid_pair_item_t; + +typedef struct vcis_tidcrid_pair_list +{ + uint64 num; + + uint16 blocks_per_samp; + uint16 num_samples; + ItemPointerData sample_tids[VCI_TID_CRID_UPDATE_CONTEXT_SAMPLES + 1]; + + /* + * + * offsetof(vcis_tidcrid_pair_list_t, body) == VCI_TID_CRID_UPDATE_PAGE_SPACE + */ + vcis_tidcrid_pair_item_t body[1]; +} vcis_tidcrid_pair_list_t; + +typedef vci_RelationPair vci_TidCridRelations; + +/* initialize function */ +extern void vci_InitializeTidCridUpdateLists(vci_MainRelHeaderInfo *info); +extern void vci_InitializeTidCridTree(vci_MainRelHeaderInfo *info); + +/* TIDCRID Tree access functions */ +extern void vci_OpenTidCridRelations(vci_TidCridRelations *rel, + vci_MainRelHeaderInfo *info, + LOCKMODE lockmode); +extern void vci_CloseTidCridRelations(vci_TidCridRelations *rel, LOCKMODE lockmode); + +#endif /* VCI_TIDCRID_H */ diff --git a/contrib/vci/include/vci_xact.h b/contrib/vci/include/vci_xact.h new file mode 100644 index 0000000..0aa4502 --- /dev/null +++ b/contrib/vci/include/vci_xact.h @@ -0,0 +1,19 @@ +/*------------------------------------------------------------------------- + * + * vci_xact.h + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_xact.h + *------------------------------------------------------------------------- + */ + +#ifndef VCI_XACT_H +#define VCI_XACT_H + +struct vci_MainRelHeaderInfo; + +extern int64 vci_GenerateXid64(TransactionId target_xid, struct vci_MainRelHeaderInfo *info); + +#endif /* VCI_XACT_H */ diff --git a/contrib/vci/readme.txt b/contrib/vci/readme.txt new file mode 100644 index 0000000..68395c0 --- /dev/null +++ b/contrib/vci/readme.txt @@ -0,0 +1,86 @@ +Columnar storage +================ + +Because of two different storage aspects to support a +proper columnar storage without affecting the performance +of write operations and also by providing a good performance +improvment to read operations, the best way to acheive it +is by spliting storage into two different types. + +Write optimized storage +======================= + +write optimized storage is the place where the data of all columns +that are part of columnar storage are stored in a row wise format. +All the newly added/deleted data is stored in WOS relation with +xmin/xmax information also. If user wants to update/delete the +newly added data, it doesn't affect the performance much compared to +deleting the data from columnar storage. + +The tuples which don't have multiple copies or frozen data will be moved +from WOS to ROS periodically by the background worker process called +WOS to ROS converter process. Every column data is stored separately +in it's relation file. There is no transaction information is present +in ROS. The data in ROS can be referred with tuple ID. + + +Read optimized storage +====================== + +This is the place, where all the column data is stored in columnar format. +The data from WOS to ROS is converted by background workers continously based +on the tuple visibility check. Whenever the tuple is frozen and it gets moved +from WOS to ROS. + +The Data in ROS is stored in extents. One extent contains of 262,144 rows. + + + +Local ROS +========= + +During each query execution, Data WOS & Whiteout WOS corresponding to the +columnar storage table will be converted into Local ROS. Life of Local ROS +ends with each query execution. Extent ID of Local ROS is lesser than any +other extent IDs of ROS. + + +Column store relations +====================== + +The relations and its relation +1. VCI Main relation + +VCI Main relation data: + +1. ROS version number +2. All the other relation OID details +3. Number of columns +4. Number of nullable columns +5. Null width +6. Coloumn info offset +7. Extent info offset +8. number of extents + +WOS relations +1. WOS data relation - Relation where the column store data gets inserted in row store format. +2. white out WOS relation - Relation used to store the removed tuples. + +ROS relations +1. TID data relation +2. TID metadata relation +3. NULL data relation +4. NULL metadata relation +5. Delete data relation +6. Delete metadata relation +7. Column relations +8. TID-CRID metadata relation +9. TID-CRID data relation +10. TID-CRID update relation one +11. TID-CRID update relation two + + + + + + diff --git a/contrib/vci/storage/Makefile b/contrib/vci/storage/Makefile new file mode 100644 index 0000000..35d34b6 --- /dev/null +++ b/contrib/vci/storage/Makefile @@ -0,0 +1,30 @@ +# contrib/vci/storage/Makefile + +SUBOBJS = \ + vci_columns.o \ + vci_freelist.o \ + vci_index.o \ + vci_ros.o \ + vci_tidcrid.o \ + vci_xact.o + +EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) + +PG_CPPFLAGS = -I $(top_srcdir)/contrib/vci/include + +ifdef USE_PGXS +PGXS := $(shell pg_config --pgxs) +include $(PGXS) +else +subdir = contrib/vci/storage +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +override CFLAGS += $(CFLAGS_SL) + +all: SUBSYS.o + +SUBSYS.o: $(SUBOBJS) + $(LD) $(LDREL) $(LDOUT) $@ $^ diff --git a/contrib/vci/storage/vci_columns.c b/contrib/vci/storage/vci_columns.c new file mode 100644 index 0000000..7dc5c88 --- /dev/null +++ b/contrib/vci/storage/vci_columns.c @@ -0,0 +1,290 @@ +/*------------------------------------------------------------------------- + * + * vci_columns.c + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_columns.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "access/xact.h" +#include "vci.h" +#include "vci_ros.h" +#include "vci_columns.h" +#include "vci_freelist.h" + +/** + * function to cast from Page to (vcis_column_meta_t *). + */ +#define vci_GetColumnMetaT(page) \ + ((vcis_column_meta_t *) &((page)[VCI_MIN_PAGE_HEADER])) + + +vcis_column_meta_t * +vci_GetColumnMeta(Buffer *buffer, Relation rel) +{ + Page page; + + *buffer = vci_ReadBufferWithPageInit(rel, VCI_COLUMN_META_HEADER_PAGE_ID); + page = BufferGetPage(*buffer); + + return vci_GetColumnMetaT(page); +} + +static void +GetColumnOids(Oid *metaOid, + Oid *dataOid, + vci_MainRelHeaderInfo *info, + int16 columnId) +{ + switch (columnId) + { + case VCI_COLUMN_ID_DELETE: + *metaOid = vci_GetMainRelVar(info, vcimrv_delete_meta_oid, 0); + *dataOid = vci_GetMainRelVar(info, vcimrv_delete_data_oid, 0); + break; + case VCI_COLUMN_ID_CRID: + *metaOid = InvalidOid; + *dataOid = InvalidOid; + break; + case VCI_COLUMN_ID_TID: + *metaOid = vci_GetMainRelVar(info, vcimrv_tid_meta_oid, 0); + *dataOid = vci_GetMainRelVar(info, vcimrv_tid_data_oid, 0); + break; + case VCI_COLUMN_ID_NULL: + *metaOid = vci_GetMainRelVar(info, vcimrv_null_meta_oid, 0); + *dataOid = vci_GetMainRelVar(info, vcimrv_null_data_oid, 0); + break; + default: + { + vcis_m_column_t *colInfo = vci_GetMColumn(info, columnId); + *metaOid = colInfo->meta_oid; + *dataOid = colInfo->data_oid; + break; + } + } +} + +void +vci_OpenColumnRelations(vci_ColumnRelations *rel, + vci_MainRelHeaderInfo *info, + int16 columnId, + LOCKMODE lockmode) +{ + Oid metaOid; + Oid dataOid; + + GetColumnOids(&metaOid, &dataOid, info, columnId); + rel->meta = heap_open(metaOid, lockmode); + rel->data = heap_open(dataOid, lockmode); + + rel->info = info; +} + +void +vci_CloseColumnRelations(vci_ColumnRelations *rel, LOCKMODE lockmode) +{ + if (rel) + { + if (RelationIsValid(rel->data)) + heap_close(rel->data, lockmode); + if (RelationIsValid(rel->meta)) + heap_close(rel->meta, lockmode); + } +} + +/** + * @brief Get attribute number from the name. + * @param[in] desc The tuple descriptor of the relation. + * @param[in] name The name of attribute. + * @return The attribute number. + * If the name is not found in the descriptor, InvalidAttrNumber is returned. + */ +static AttrNumber +GetAttNum(TupleDesc desc, const char *name) +{ + int aId; + + for (aId = 0; aId < desc->natts; ++aId) + { + if (!strcmp(name, desc->attrs[aId]->attname.data)) + return aId + 1; + } + + return InvalidAttrNumber; +} + +static void +InitColumnMetaRelation(vci_ColumnRelations *relPair, + Form_pg_attribute attr, + vcis_compression_type_t compType, + TupleDesc heapTupleDesc) +{ + vcis_column_meta_t *columnMeta; + BlockNumber firstBlockNumber = VCI_COLUMN_DATA_FIRST_PAGE_ID; + + vci_FormatPageWithOneItem(relPair->meta, VCI_COLUMN_META_HEADER_PAGE_ID); + + columnMeta = vci_GetColumnMeta(&relPair->bufMeta, relPair->meta); + LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE); + + /** + * + * vcis_attribute_type_delete_vec, + * vcis_attribute_type_null_vec, + * vcis_attribute_type_tid, + * vcis_attribute_type_pgsql + * + * columnMeta->vcis_attr_type = XXXXXX ; + * + */ + + if (attr) + { /* normal columns */ + columnMeta->pgsql_atttypid = attr->atttypid; + columnMeta->pgsql_attnum = GetAttNum(heapTupleDesc, attr->attname.data); + columnMeta->pgsql_attlen = attr->attlen; + columnMeta->pgsql_atttypmod = attr->atttypmod; + + if (InvalidAttrNumber == columnMeta->pgsql_attnum) + ereport(ERROR, (errmsg("column missed in VCI index creation"), + errhint("This must never happen. " + "Give up to use VCI index."))); + } + else + { /* delete, null, or tid */ + columnMeta->pgsql_atttypid = InvalidOid; + columnMeta->pgsql_attlen = 0; + columnMeta->pgsql_atttypmod = 0; + } + + columnMeta->num_extents = 0; + columnMeta->num_extents_old = 0; + columnMeta->free_page_begin_id = firstBlockNumber; + columnMeta->free_page_begin_id_old = firstBlockNumber; + columnMeta->free_page_end_id = firstBlockNumber; + columnMeta->free_page_end_id_old = firstBlockNumber; + columnMeta->free_page_prev_id = InvalidBlockNumber; + columnMeta->free_page_next_id = InvalidBlockNumber; + columnMeta->num_free_pages = 1; + columnMeta->num_free_pages_old = 1; + columnMeta->num_free_page_blocks = 1; + columnMeta->num_free_page_blocks_old = 1; + columnMeta->min_max_field_size = 0; + columnMeta->min_max_content_size = 0; + columnMeta->latest_common_dict_id = VCI_INVALID_DICTIONARY_ID; + + if (vci_UseDictionary(compType)) + { + columnMeta->num_common_dicts = VCI_NUM_COMMON_DICT; + columnMeta->common_dict_info_offset = offsetof(vcis_column_meta_t, + common_dict_info); + columnMeta->block_number_extent_offset = offsetof(vcis_column_meta_t, + common_dict_info) + + sizeof(vcis_c_common_dict_t) * + VCI_NUM_COMMON_DICT; + } + else + { + columnMeta->num_common_dicts = 0; + columnMeta->common_dict_info_offset = 0; + columnMeta->block_number_extent_offset = offsetof(vcis_column_meta_t, + common_dict_info); + } + + vci_WriteColumnMetaDataHeader(relPair->meta, relPair->bufMeta); + UnlockReleaseBuffer(relPair->bufMeta); +} + +static void +InitDeleteVectorRelation(vci_ColumnRelations *relPair) +{ + OffsetNumber oNum; + + vci_FormatPageWithItems(relPair->data, + VCI_COLUMN_DATA_FIRST_PAGE_ID, + VCI_ITEMS_IN_PAGE_FOR_DELETE); + relPair->bufData = ReadBuffer(relPair->data, VCI_COLUMN_DATA_FIRST_PAGE_ID); + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + + for (oNum = FirstOffsetNumber; + oNum <= VCI_ITEMS_IN_PAGE_FOR_DELETE; + ++oNum) + vci_WriteItem(relPair->data, relPair->bufData, oNum); + + UnlockReleaseBuffer(relPair->bufData); +} + +static void +InitColumnDataRelation(vci_ColumnRelations *relPair) +{ + vcis_free_space_t *freeSpace; + + vci_FormatPageWithOneItem(relPair->data, VCI_COLUMN_DATA_FIRST_PAGE_ID); + + freeSpace = vci_GetFreeSpace((vci_RelationPair *) relPair, VCI_COLUMN_DATA_FIRST_PAGE_ID); + freeSpace->size = MaxBlockNumber; + freeSpace->type = vcis_free_space; + freeSpace->prev_pos = InvalidBlockNumber; + freeSpace->next_pos = InvalidBlockNumber; + + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + vci_WriteOneItemPage(relPair->data, relPair->bufData); + UnlockReleaseBuffer(relPair->bufData); +} + +/** + * + * - Delete Vector + * - TID + * - NULL bit Vector + * + * @param[in] heapRel Indexed relation of original PostgreSQL table. + */ +void +vci_InitializeColumnRelations(vci_MainRelHeaderInfo *info, + TupleDesc tupdesc, + Relation heapRel) +{ + const LOCKMODE lockmode = ShareLock; + int16 colId; + TupleDesc heapTupleDesc = RelationGetDescr(heapRel); + + Assert((INT64CONST(0xFFFFFFFFFFFF0000) & tupdesc->natts) == 0); + + for(colId = VCI_COLUMN_ID_DELETE; colId < (int16) tupdesc->natts; ++colId) + { + vci_ColumnRelations relPairData; + vci_ColumnRelations *relPair = &relPairData; + + Form_pg_attribute attr; + vcis_compression_type_t compType; + + if (colId >= VCI_FIRST_NORMALCOLUMN_ID) + { + attr = tupdesc->attrs[colId]; + compType = vci_GetMColumn(info, colId)->comp_type; + } + else + { + attr = NULL; + compType = vcis_compression_type_fixed_raw; + } + + vci_OpenColumnRelations(relPair, info, colId, lockmode); + InitColumnMetaRelation(relPair, attr, compType, heapTupleDesc); + + if (colId == VCI_COLUMN_ID_DELETE) + { + InitDeleteVectorRelation(relPair); + } + else + { + InitColumnDataRelation(relPair); + } + vci_CloseColumnRelations(relPair, lockmode); + } +} diff --git a/contrib/vci/storage/vci_freelist.c b/contrib/vci/storage/vci_freelist.c new file mode 100644 index 0000000..b440bdc --- /dev/null +++ b/contrib/vci/storage/vci_freelist.c @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------- + * + * vci_freeslist.c + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_freeslist.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "vci.h" +#include "vci_freelist.h" + +static vcis_free_space_t *GetFreeSpaceT(Page page); + +/** + * function to cast from Page to (vcis_freespace_t *) + */ +static vcis_free_space_t * +GetFreeSpaceT(Page page) +{ + HeapTupleHeader htup; + htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, VCI_FREESPACE_ITEM_ID)); + + return (vcis_free_space_t *) ((char *) htup + htup->t_hoff); +} + +vcis_free_space_t * +vci_GetFreeSpace(vci_RelationPair *relPair, BlockNumber blk) +{ + Page page; + + relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, blk); + page = BufferGetPage(relPair->bufData); + + return GetFreeSpaceT(page); +} + + + diff --git a/contrib/vci/storage/vci_index.c b/contrib/vci/storage/vci_index.c new file mode 100644 index 0000000..83f3273 --- /dev/null +++ b/contrib/vci/storage/vci_index.c @@ -0,0 +1,1098 @@ +/*------------------------------------------------------------------------- + * + * vci_index.c + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_index.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "access/multixact.h" +#include "access/xact.h" +#include "access/nbtree.h" +#include "catalog/catalog.h" +#include "catalog/heap.h" +#include "catalog/index.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "catalog/storage.h" +#include "commands/defrem.h" +#include "commands/tablecmds.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/relation.h" +#include "rewrite/rewriteRemove.h" +#include "rewrite/rewriteSupport.h" +#include "storage/predicate.h" +#include "storage/smgr.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" +#include "vci.h" +#include "vci_columns.h" +#include "vci_xact.h" + +#ifdef WIN32 +#define __func__ __FUNCTION__ +#endif + +#ifdef HAVE_DESIGNATED_INITIALIZERS +#define SFINIT(f, ...) f = __VA_ARGS__ +#else +#define SFINIT(f, ...) __VA_ARGS__ +#endif + +/** + * Data Relation + */ +#define VCI_RELTYPE_DATA ('d') + +/** + * Meta Relation + */ +#define VCI_RELTYPE_META ('m') + +/** + * WOS Relation + */ +#define VCI_RELTYPE_WOS ('W') + +/** + * ROS Relation + */ +#define VCI_RELTYPE_ROS ('R') + +/** + * TIDCRID Relation + */ +#define VCI_RELTYPE_TIDCRID ('T') + +/* local functions */ +static IndexBuildResult *vci_inner_build(Relation, Relation, IndexInfo *); +static char relNameBuf[NAMEDATALEN]; +static bool copy_with_freeze_option; +bool vci_is_in_vci_create_extension; + +IndexBuildResult *vci_build(Relation heap, Relation index, IndexInfo *indexInfo); +void vci_buildempty(Relation index); +bool vci_insert(Relation rel, Datum *values, bool *isnull, + ItemPointer ht_ctid, Relation heapRel, + IndexUniqueCheck checkUnique); +IndexBulkDeleteResult * vci_bulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callback_state); +IndexBulkDeleteResult *vci_vacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); +bool vci_canreturn(Relation index, int attno); +void vci_costestimate(PlannerInfo *root, IndexPath *path, double loop_count,Cost *indexStartupCost, Cost *indexTotalCost,Selectivity *indexSelectivity, double *indexCorrelation); +bytea *vci_options(Datum reloptions, bool validate); +IndexScanDesc vci_beginscan(Relation rel, int nkeys, int norderbys); +void vci_rescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, + ScanKey orderbys, int norderbys); +bool vci_gettuple(IndexScanDesc scan, ScanDirection dir); +bool vci_validate(Oid opclassoid); +int64 vci_getbitmap(IndexScanDesc scan, TIDBitmap *tbm); +void vci_endscan(IndexScanDesc scan); +void vci_markpos(IndexScanDesc scan); +void vci_restrpos(IndexScanDesc scan); + +bool +vci_isVciIndexRelation(Relation rel) +{ + char *amname = NULL; + + if (rel->rd_rel->relkind == RELKIND_INDEX) + { + amname = get_am_name(rel->rd_rel->relam); + return strcmp(amname, VCI_STRING) == 0; + } + + return false; +} + +bool +vci_isVciAdditionalRelation(Relation rel) +{ + return vci_isVciAdditionalRelationTuple(rel->rd_id, rel->rd_rel); +} + +bool +vci_isVciAdditionalRelationTuple(Oid reloid, Form_pg_class reltuple) +{ + if (reltuple->relkind == RELKIND_CSTORE) + { + int ret; + int dummy1; + int dummy2; + char dummy3; + + ret = sscanf(NameStr(reltuple->relname), VCI_INTERNAL_RELATION_TEMPLATE, + &dummy1, &dummy2, &dummy3); + + if (ret == 3) + { + Oid ruleId = get_rewrite_oid(reloid, NameStr(reltuple->relname), true); + + return OidIsValid(ruleId); + } + } + + return false; +} + +/* custom index */ + +/** + * vci_build + */ + IndexBuildResult * +vci_build(Relation heap, Relation index, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + + result = vci_inner_build(heap, index, indexInfo); + + return result; +} + +/** + * vci_buildempty + */ +void +vci_buildempty(Relation index) +{ + return; +} + + +/** + * vci_insert + */ +bool +vci_insert(Relation rel, Datum *values, bool *isnull, + ItemPointer ht_ctid, Relation heapRel, + IndexUniqueCheck checkUnique) +{ + return false; +} + +/** + * vci_bulkdelete + */ +IndexBulkDeleteResult * +vci_bulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callback_state) +{ + return stats; +} + +/** + * vci_vacuumcleanup + */ +IndexBulkDeleteResult * +vci_vacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) +{ + if (info->analyze_only) + { + return stats; + } + + return stats; +} + +/** + * vci_canreturn + */ +bool +vci_canreturn(Relation index, int attno) +{ + return false; +} + +/** + * vci_costestimate + */ +void +vci_costestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation) +{ + /* always return worst cost value */ + *indexStartupCost = DBL_MAX; + *indexTotalCost = DBL_MAX; + *indexSelectivity = 1.0; + *indexCorrelation = 0.0; + + return; +} + +/** + * vci_options + */ +bytea * +vci_options(Datum reloptions, bool validate) +{ + return NULL; +} + +bool +vci_validate(Oid opclassoid) +{ + return true; +} + +/* LCOV_EXCL_START */ +/** + * vci_beginscan + */ +IndexScanDesc +vci_beginscan(Relation rel, int nkeys, int norderbys) +{ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", PG_FUNCNAME_MACRO))); + + return NULL; +} + +/** + * vci_rescan + */ +void +vci_rescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, + ScanKey orderbys, int norderbys) +{ + /* pass */ + return; +} + +/** + * vci_gettuple + */ +bool +vci_gettuple(IndexScanDesc scan, ScanDirection dir) +{ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", PG_FUNCNAME_MACRO))); + + return false; +} + +/** + * vci_getbitmap + */ +int64 +vci_getbitmap(IndexScanDesc scan, TIDBitmap *tbm) +{ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", PG_FUNCNAME_MACRO))); + + return 0; +} + +/** + * ci_endscan + */ +void +vci_endscan(IndexScanDesc scan) +{ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", PG_FUNCNAME_MACRO))); + + return; +} + +/** + * vci_markpos + */ +void +vci_markpos(IndexScanDesc scan) +{ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", PG_FUNCNAME_MACRO))); + + return; +} + +/** + * vci_restrpos + */ +void +vci_restrpos(IndexScanDesc scan) +{ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", PG_FUNCNAME_MACRO))); + + return; +} + + +/* --body-- */ + +static Oid +vci_create_relation(const char *rel_identifier, Relation indexRel, IndexInfo *indexInfo, char vci_reltype) +{ + int natts; + int i; + + /* system catalog relation id */ + Relation pg_class; + Relation pg_attr; + + /* new rel, oid, tupdesc */ + Relation new_rel; + Oid new_oid; + TupleDesc new_tupdesc; + + /* attributes */ + Oid ownerid = GetUserId(); + + char relkind = RELKIND_CSTORE; + Oid new_type_oid = InvalidOid; + Oid reloftypeid = InvalidOid; + CatalogIndexState indstate; + + char relname[NAMEDATALEN]; /* max 64 characters */ + Oid reltablespace; + Oid relnamespace; + char relpersistence; + + /* variables for pg_class */ + Form_pg_class new_rel_reltup; + + RangeVar *relrv; + + /* Insert pg_depend table */ + ObjectAddress oaIndex; + ObjectAddress oaNewRel; + + relnamespace = indexRel->rd_rel->relnamespace; + reltablespace = indexRel->rd_rel->reltablespace; + relpersistence = indexRel->rd_rel->relpersistence; + + /* function start */ + memset(relname, 0, sizeof(relname)); + strncpy(relname, rel_identifier, sizeof(relname)); + + /* + */ + relrv = makeRangeVar(get_namespace_name(relnamespace), relname, -1); + new_oid = RangeVarGetRelid(relrv, AccessShareLock, true); + + if (OidIsValid(new_oid)) + { + new_rel = relation_open(new_oid, AccessExclusiveLock); + RelationSetNewRelfilenode(new_rel, new_rel->rd_rel->relpersistence,RecentXmin, GetOldestMultiXactId()); + if (new_rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + heap_create_init_fork(new_rel); + + heap_close(new_rel, NoLock); /* do not unlock till end of xact */ + + return new_oid; + } + + /* Generate Data WOS */ + pg_class = heap_open(RelationRelationId, RowExclusiveLock); + + /* 4.6.1 get new Oid for new relation */ + + new_oid = GetNewRelFileNode(reltablespace, pg_class, relpersistence); + + /* The following line is meaningful? + * Or shoud we remove it? + */ + get_user_default_acl(ACL_OBJECT_RELATION, ownerid, relnamespace); + + /* 4.6.1.2 create new relation cache entry */ + + /* new tuple descriptor has TID column */ + + switch (vci_reltype) + { + /* WOS */ + case VCI_RELTYPE_WOS: + natts = 2; + new_tupdesc = CreateTemplateTupleDesc(natts, false);/* no Oid */ + TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "orignal_tid", TIDOID, -1, 0); + TupleDescInitEntry(new_tupdesc, (AttrNumber) 2, "xid", INT8OID, -1, 0); + break; + + /* ROS */ + case VCI_RELTYPE_ROS: + natts = 1; + new_tupdesc = CreateTemplateTupleDesc(natts, false);/* no Oid */ + TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "bindata", BYTEAOID, -1, 0);/* */ + break; + + /* TIC-CRID */ + case VCI_RELTYPE_TIDCRID: + natts = 1; + new_tupdesc = CreateTemplateTupleDesc(natts, false);/* no Oid */ + TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "bindata", BYTEAOID, -1, 0);/* */ + break; + + /* LCOV_EXCL_START */ + default: + elog(ERROR, "unexpected vci_reltype"); + break; + /* LCOV_EXCL_STOP */ + } + + /* + * Create the relcache entry (mostly dummy at this point) and the physical + * disk file. (If we fail further down, it's the smgr's responsibility to + * remove the disk file again.) + */ + new_rel = RelationBuildLocalRelation(relname, + relnamespace, + new_tupdesc, + new_oid, + new_oid, /* relfilenode */ + reltablespace, + false, /* shared_relation*/ + false, /* mapped_relation */ + relpersistence, + relkind); + + /* 4.6.1.3 create new starge for new relation */ + RelationOpenSmgr(new_rel); + RelationCreateStorage(new_rel->rd_node, relpersistence); + + Assert(new_oid == RelationGetRelid(new_rel)); + + /* 4.6.1.4 add new entry into pg_class */ + new_rel_reltup = new_rel->rd_rel; + new_rel_reltup->relpages = 0; + new_rel_reltup->reltuples = 0; + new_rel_reltup->relallvisible = 0; + new_rel_reltup->relfrozenxid = RecentXmin; + new_rel_reltup->relminmxid = GetOldestMultiXactId(); + new_rel_reltup->relowner = ownerid; + new_rel_reltup->reltype = new_type_oid; + new_rel_reltup->reloftype = reloftypeid; + + /* + * @see https://www.postgresql.jp/document/9.4/html/catalog-pg-rewrite.html + */ + new_rel_reltup->relhasrules = false; + + new_rel->rd_att->tdtypeid = new_type_oid; + + InsertPgClassTuple(pg_class, new_rel, new_oid, (Datum) 0, (Datum) 0); + + /* 4.6.1.5 -now add tuples to pg_attribute for the attributes in our new relation. */ + + /* + * open pg_attribute and its indexes. + */ + pg_attr = heap_open(AttributeRelationId, RowExclusiveLock); + indstate = CatalogOpenIndexes(pg_attr); + + /* + * First we add the user attributes. This is also a convenient place to + * add dependencies on their datatypes and collations. + */ + for (i = 0; i < natts; i++) + { + /* variables for pg_attribute */ + Form_pg_attribute attr; + + attr = new_tupdesc->attrs[i]; + /* Fill in the correct relation OID */ + attr->attrelid = new_oid; + + /* Make sure these are OK? */ + attr->attstattarget = -1; + attr->attcacheoff = -1; + attr->attstorage = 'p';/* 'p': Value must be stored plain always */ + + InsertPgAttributeTuple(pg_attr, attr, indstate); + } + + /* + * clean up pg_attribute + */ + CatalogCloseIndexes(indstate); + heap_close(pg_attr, RowExclusiveLock); + + oaIndex.classId = RelationRelationId; + oaIndex.objectId = indexRel->rd_id; + oaIndex.objectSubId = 0; + oaNewRel.classId = RelationRelationId; + oaNewRel.objectId = new_oid; + oaNewRel.objectSubId = 0; + + recordDependencyOn(&oaNewRel, &oaIndex, DEPENDENCY_INTERNAL); + + /* + * ok! the relation has been cataloged, so close our relations and return + * the OID of the newly created relation. + */ + + heap_close(new_rel, NoLock); /* do not unlock till end of xact */ + heap_close(pg_class, RowExclusiveLock); + + return new_oid; +} + +static char * +GenRelName(Relation rel, int16 columnId, char suffix) +{ + snprintf(relNameBuf, NAMEDATALEN, VCI_INTERNAL_RELATION_TEMPLATE, RelationGetRelid(rel), + (0xFFFF & columnId), suffix); + + return relNameBuf; +} + + +static void +CheckIndexedRelationKind(Relation rel) +{ + char relKind = get_rel_relkind(RelationGetRelid(rel)); + + if (relKind == RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support index on materialized view", VCI_STRING))); + + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support index on temporary table", VCI_STRING))); +} + +static void +CheckIndexInfo(IndexInfo *indexInfo, Relation indexRel) +{ + int i=0; + + /* check Concurrent option first. */ + if (indexInfo->ii_Concurrent) + /* LCOV_EXCL_START */ + elog(PANIC, "should not reach here"); + /* LCOV_EXCL_STOP */ + + if (indexInfo->ii_Predicate != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support partial-index", VCI_STRING))); + + if (indexInfo->ii_Expressions != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support to CREATE INDEX on the expression", VCI_STRING))); + + if (indexInfo->ii_ExclusionOps != NULL || + indexInfo->ii_ExclusionProcs != NULL || + indexInfo->ii_ExclusionStrats != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support EXCLUDE clause", VCI_STRING))); + + for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + { + AttrNumber an = indexInfo->ii_KeyAttrNumbers[i]; + int j; + + for (j = i+1 ; j < indexInfo->ii_NumIndexAttrs; j++) + { + if (an == indexInfo->ii_KeyAttrNumbers[j]) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("duplicated columns in vci index creation: %s", + NameStr(RelationGetDescr(indexRel)->attrs[indexInfo->ii_KeyAttrNumbers[j] - 1]->attname)), + errhint("duplicated columns are specified"))); + } + } + } +} + +static void +CheckIndexColumnTypes(TupleDesc tupdesc) +{ + int i; + + for (i = 0; i < tupdesc->natts; i++) + { + Oid typeoid = tupdesc->attrs[i]->atttypid; + + if (!vci_is_supported_type(typeoid)) + { + HeapTuple tuple; + Form_pg_type typetuple; + + tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for type %u", typeoid); + + typetuple = (Form_pg_type) GETSTRUCT(tuple); + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("data type %s is not supported for access method \"%s\"", + NameStr(typetuple->typname), VCI_STRING))); + + ReleaseSysCache(tuple); + } + } +} + +static IndexBuildResult * +vci_inner_build(Relation heapRel, Relation indexRel, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + Oid oid; + + vci_MainRelHeaderInfo *vmr_info; + + int i; + TupleDesc tupdesc; + + uint32 offsetToExtentInfo; + + CheckIndexedRelationKind(heapRel); + CheckIndexInfo(indexInfo, indexRel); + CheckIndexColumnTypes(RelationGetDescr(indexRel)); + + /* create VCI main relation */ + vmr_info = (vci_MainRelHeaderInfo *) palloc0(sizeof(vci_MainRelHeaderInfo)); + vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_ros_conv_build); + + if (RelationGetNumberOfBlocks(indexRel) != 0) + elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(indexRel)); + + /* create blank page * VCI_NUM_MAIN_REL_HEADER_PAGES */ + vci_PreparePagesWithOneItemIfNecessary(indexRel, + lengthof(vmr_info->buffer) - 1); + + vci_KeepMainRelHeaderWithoutVersionCheck(vmr_info); + + /* write ROS format version */ + vci_SetMainRelVar(vmr_info, vcimrv_ros_version_major, 0, + VCI_ROS_VERSION_MAJOR); + vci_SetMainRelVar(vmr_info, vcimrv_ros_version_minor, 0, + VCI_ROS_VERSION_MINOR); + + /* create WOS relations */ + /* register WOS relation's OID to VCI Main relation */ + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DATA_WOS, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_WOS); + vci_SetMainRelVar(vmr_info, vcimrv_data_wos_oid, 0, oid); + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_WHITEOUT_WOS, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_WOS); + vci_SetMainRelVar(vmr_info, vcimrv_whiteout_wos_oid, 0, oid); + + /* create ROS relations */ + + /* TID */ + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_tid_data_oid, 0, oid); + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_tid_meta_oid, 0, oid); + + /* NUll */ + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_NULL, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_null_data_oid, 0, oid); + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_NULL, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_null_meta_oid, 0, oid); + + /* Delete Vector */ + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DELETE, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_delete_data_oid, 0, oid); + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DELETE, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_delete_meta_oid, 0, oid); + + /* Column Stores */ + tupdesc = RelationGetDescr(indexRel); + vci_SetMainRelVar(vmr_info, vcimrv_num_columns, 0, tupdesc->natts); + for (i = 0; i < tupdesc->natts; i++) + { + Oid column_store_oid; + Oid column_meta_oid; + vcis_m_column_t *columnPointer; + + column_store_oid = vci_create_relation(GenRelName(indexRel, i, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS); + column_meta_oid = vci_create_relation(GenRelName(indexRel, i, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS); + + /* set ROS column pointer, */ + columnPointer = vci_GetMColumn(vmr_info, i); + + columnPointer->meta_oid = column_meta_oid; + columnPointer->data_oid = column_store_oid; + columnPointer->max_columns_size = vci_GetColumnWorstSize(tupdesc->attrs[i]); + if (tupdesc->attrs[i]->attlen == -1) + { + columnPointer->comp_type = vcis_compression_type_variable_raw; + } + else if (tupdesc->attrs[i]->attlen > 0) + { + columnPointer->comp_type = vcis_compression_type_fixed_raw; + } + else + { + Assert(false); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected attribute length"))); + } + /* put default extent(free_page) to each columns */ + } + vci_SetMainRelVar(vmr_info, vcimrv_num_nullable_columns, 0, + vci_GetNumberOfNullableColumn(vmr_info->rel)); + vci_SetMainRelVar(vmr_info, vcimrv_null_width_in_byte, 0, + (vci_GetNumberOfNullableColumn(vmr_info->rel) + BITS_PER_BYTE - 1) / + BITS_PER_BYTE); + + /* create TID-CRID relations */ + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_TIDCRID); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_meta_oid, 0, oid); + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_TIDCRID); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_data_oid, 0, oid); + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID_UPDATE, '0'), indexRel, indexInfo, VCI_RELTYPE_TIDCRID); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_0, 0, oid); + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID_UPDATE, '1'), indexRel, indexInfo, VCI_RELTYPE_TIDCRID); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_1, 0, oid); + + /* other variables */ + vci_SetMainRelVar(vmr_info, vcimrv_column_info_offset, 0, vcimrv_column_info - VCI_MIN_PAGE_HEADER); + + offsetToExtentInfo = (vci_MRVGetBlockNumber(vcimrv_extent_info) * VCI_MAX_PAGE_SPACE) + + vci_MRVGetOffset(vcimrv_extent_info) - VCI_MIN_PAGE_HEADER; + + vci_SetMainRelVar(vmr_info, vcimrv_extent_info_offset, 0, offsetToExtentInfo); + vci_SetMainRelVar(vmr_info, vcimrv_size_mr, 0, offsetToExtentInfo); + vci_SetMainRelVar(vmr_info, vcimrv_size_mr_old, 0, offsetToExtentInfo); + + vci_SetMainRelVar(vmr_info, vcimrv_current_ros_version, 0, FrozenTransactionId); + vci_SetMainRelVar(vmr_info, vcimrv_last_ros_version, 0, FrozenTransactionId); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_diff_sel, 0, 0); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_diff_sel_old, 0, 0); + + vci_SetMainRelVar(vmr_info, vcimrv_xid_generation, 0, 1); /* xid generation starts from 1 */ + vci_SetMainRelVar(vmr_info, vcimrv_xid_gen_udpate_xid, 0, GetCurrentTransactionId()); + + vci_SetMainRelVar(vmr_info, vcimrv_ros_command, 0, vci_rc_invalid); + vci_SetMainRelVar(vmr_info, vcimrv_num_unterminated_copy_cmd, 0, 0); + + vci_SetMainRelVar(vmr_info, vcimrv_num_extents, 0, 0); + vci_SetMainRelVar(vmr_info, vcimrv_num_extents_old, 0, 0); + + /* flush */ + vci_WriteMainRelVar(vmr_info, vci_wmrv_all); + + /* initialize meta data relations and data relations */ + vci_InitializeColumnRelations(vmr_info, tupdesc, heapRel); + + /* initialize meta data relations and data relations */ + vci_InitializeTidCridUpdateLists(vmr_info); + vci_InitializeTidCridTree(vmr_info); + + /* unlock */ + vci_ReleaseMainRelHeader(vmr_info); + pfree(vmr_info); + + /* create statistics for return to caller */ + result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult)); + result->heap_tuples = 0.0; + result->index_tuples = 0; + + return result; +} + +/** + * vci_add_index_delete + */ +void +vci_add_index_delete(Relation heapRel, ItemPointer heap_tid, TransactionId xmin) +{ + List *indexoidlist; + ListCell *l; + + /* Fast path if definitely no indexes */ + if (!RelationGetForm(heapRel)->relhasindex) + return; + + /* + * Get cached list of index OIDs + */ + indexoidlist = RelationGetIndexList(heapRel); + + /* Iterate for indexes*/ + foreach(l, indexoidlist) + { + Oid indexOid = lfirst_oid(l); + Relation indexRel; + + Oid whiteoutWosOid; + Relation whiteoutWOSRel; + Datum new_values[2]; + bool new_isnull[2]; + HeapTuple htup; + TupleDesc tdesc; + + vci_MainRelHeaderInfo vmr_info_data; + vci_MainRelHeaderInfo *vmr_info = &vmr_info_data; + + TransactionId xid; + + /* Skip if Index is NOT VCI index */ + indexRel = index_open(indexOid, RowExclusiveLock); + if (!vci_isVciIndexRelation(indexRel)) + { + index_close(indexRel, RowExclusiveLock); + continue; + } + + vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_delete); + vci_KeepMainRelHeader(vmr_info); + + /* Open Whiteout WOS */ + whiteoutWosOid = (Oid) vci_GetMainRelVar(vmr_info, vcimrv_whiteout_wos_oid, 0); + whiteoutWOSRel = heap_open(whiteoutWosOid, RowExclusiveLock); + + tdesc = RelationGetDescr(whiteoutWOSRel); + + /* @see generateXidDiff() in vci_ros_command.c */ + if (!TransactionIdEquals(xmin, FrozenTransactionId)) + xid = xmin; + else + xid = GetCurrentTransactionId(); + + /* create new tuple for insert */ + new_values[0] = ItemPointerGetDatum(heap_tid); + new_values[1] = Int64GetDatum(vci_GenerateXid64(xid, vmr_info)); + new_isnull[0] = false; + new_isnull[1] = false; + + htup = heap_form_tuple(tdesc, new_values, new_isnull); + + /* insert TID into Whiteout WOS */ + simple_heap_insert(whiteoutWOSRel, htup); + heap_freetuple(htup); + heap_close(whiteoutWOSRel, RowExclusiveLock); + + /* flush & unlock */ + vci_ReleaseMainRelHeader(vmr_info); + + index_close(indexRel, RowExclusiveLock); + } + + list_free(indexoidlist); +} + +bool +vci_add_drop_relation(Oid oid, int flags) +{ + Relation rel; + Oid ruleId; + char relKind = get_rel_relkind(oid); + bool concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY) + == PERFORM_DELETION_CONCURRENTLY); + + if (relKind == RELKIND_INDEX) + { + rel = relation_open(oid, AccessExclusiveLock); + + if (!vci_isVciIndexRelation(rel)) + { + relation_close(rel, NoLock); + return false; + } + relation_close(rel, NoLock); + + /* + * Deletion of VCI index by ALTER TABLE command is not supported + * + * Ereport only if the relation is vci main relation so that + * it does not give unneccesary messages. + * + * Return true when so that the post-processing does not continue. + */ + if (vci_rebuild_command == vcirc_alter_table) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter table because the table is indexed by VCI"), + errhint("You must drop index \"%s\" before using this command.", RelationGetRelationName(rel)))); + } + + if (concurrent) + elog(PANIC, "should not reach here"); + + index_drop(oid, concurrent); + + } + else + { + /* SD 4.6.3 */ + rel = relation_open(oid, AccessExclusiveLock); + + if (!vci_isVciAdditionalRelation(rel)) + { + relation_close(rel, NoLock); + return false; + } + + if (concurrent) + elog(PANIC, "should not reach here"); + + /* 2.1 Is relation used? */ + CheckTableNotInUse(rel, "DROP TABLE"); + CheckTableForSerializableConflictIn(rel); + + ruleId = get_rewrite_oid(oid, rel->rd_rel->relname.data, true); + + /* 2.2 Drop relation storage */ + RelationDropStorage(rel); + + relation_close(rel, NoLock); + remove_on_commit_action(oid); + + /* 2.3 release relation cache */ + RelationForgetRelation(oid); + + /* 2.4 remove statistic info */ + RemoveStatistics(oid, 0); + + /* 2.5 remove pg_rewrite entry */ + if (ruleId != InvalidOid) + RemoveRewriteRuleById(ruleId); + + /* 2.6 remove pg_attributes entry*/ + DeleteAttributeTuples(oid); + + /* 2.7 remove pg_system entry */ + DeleteRelationTuple(oid); + + } + + return true; +} + +bool +vci_add_skip_vci_index(Relation indexRel) +{ + return vci_isVciIndexRelation(indexRel); +} + + +/* + * Process Utility Hook + */ + +/** + * parsetree: the parse tree for the utility statement + * queryString: original source text of command + * context: identifies source of statement (toplevel client command, + * non-toplevel client command, subcommand of a larger utility command) + * params: parameters to use during execution + * dest: where to send results + * completionTag: points to a buffer of size COMPLETION_TAG_BUFSIZE + * in which to store a command completion status string. + */ +void +vci_process_utility(Node *parseTree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + DestReceiver *destReceiver, char *completionTag) +{ + bool creating_vci_extension = false; + volatile bool saved_vci_is_in_vci_create_extension; + + saved_vci_is_in_vci_create_extension = vci_is_in_vci_create_extension; + + if (creating_vci_extension) + vci_is_in_vci_create_extension = true; + + vci_rebuild_command = vcirc_invalid; + copy_with_freeze_option = false; + + if (creating_vci_extension) + { + + PG_TRY(); + { + if (process_utility_prev != NULL) + process_utility_prev(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + else + standard_ProcessUtility(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + } + PG_CATCH(); + { + vci_is_in_vci_create_extension = saved_vci_is_in_vci_create_extension; + + PG_RE_THROW(); + } + PG_END_TRY(); + } + else + { + + if (process_utility_prev != NULL) + process_utility_prev(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + else + standard_ProcessUtility(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + } + + vci_rebuild_command = vcirc_invalid; + + vci_is_in_vci_create_extension = saved_vci_is_in_vci_create_extension; +} + +/* + * VCI handler function: return IndexAmRoutine with access method parameters + * and callbacks. + */ +PG_FUNCTION_INFO_V1(vci_handler); + +Datum +vci_handler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 1; + amroutine->amsupport = 0; + amroutine->amcanorder = false; + amroutine->amcanorderbyop = false; + amroutine->amcanbackward = false; + amroutine->amcanunique = false; + amroutine->amcanmulticol = true; + amroutine->amoptionalkey = false; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amkeytype = InvalidOid; + amroutine->ambuild = vci_build; + amroutine->ambuildempty = vci_buildempty; + amroutine->aminsert = vci_insert; + amroutine->ambulkdelete = vci_bulkdelete; + amroutine->amvacuumcleanup = vci_vacuumcleanup; + amroutine->amcanreturn = vci_canreturn; + amroutine->amcostestimate = vci_costestimate; + amroutine->amoptions = vci_options; + amroutine->amvalidate = vci_validate; + amroutine->ambeginscan = vci_beginscan; + amroutine->amrescan = vci_rescan; + amroutine->amgettuple = vci_gettuple; + amroutine->amgetbitmap = vci_getbitmap; + amroutine->amendscan = vci_endscan; + amroutine->ammarkpos = vci_markpos; + amroutine->amrestrpos = vci_restrpos; + + PG_RETURN_POINTER(amroutine); +} diff --git a/contrib/vci/storage/vci_ros.c b/contrib/vci/storage/vci_ros.c new file mode 100644 index 0000000..26e0cc3 --- /dev/null +++ b/contrib/vci/storage/vci_ros.c @@ -0,0 +1,624 @@ +/*------------------------------------------------------------------------- + * + * vci_ros.c + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_ros.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "access/heapam_xlog.h" +#include "catalog/pg_type.h" +#include "mb/pg_wchar.h" /* for MAX_MULTIBYTE_CHAR_LEN */ +#include "miscadmin.h" +#include "utils/varbit.h" +#include "vci.h" +#include "vci_ros.h" +#include "vci_freelist.h" + +/** + * @brief Initialize the structure info to access the header of VCI main + * relation. + * This function "just" initialize the give object. + * To access the information in the header, keep the DB pages in buffer + * using vci_KeepMainRelHeader(). + * The accessors are vci_GetMainRelVar() and vci_SetMainRelVar(). + * After modifying the information, call vci_WriteMainRelVar() to write + * the page back to the storage. + * Finally to release the buffer, call vci_ReleaseMainRelHeader(). + * @param[out] info Pointer to the target vci_MainRelHeaderInfo, + * which will be initialized + * @param[in] rel VCI main relation. + * @param[in] command ROS command which uses this structure. + */ +void +vci_InitMainRelHeaderInfo(vci_MainRelHeaderInfo *info, + Relation rel, + vci_ros_command_t command) +{ + int aId; + + Assert(NULL != info); + info->rel = rel; + for (aId = 0; aId < lengthof(info->buffer); ++aId) + info->buffer[aId] = InvalidBuffer; + info->command = command; + info->num_extents_allocated = -1; +} + +static void +KeepMainRelHeader(vci_MainRelHeaderInfo *info) +{ + int blockNum; + + Assert(NULL != info); + Assert(NULL != info->rel); + for (blockNum = 0; blockNum < lengthof(info->buffer); ++blockNum) + info->buffer[blockNum] = vci_ReadBufferWithPageInit(info->rel, blockNum); +} + +static void +CheckRosVersion(vci_MainRelHeaderInfo *info) +{ + uint32 major = vci_GetMainRelVar(info, vcimrv_ros_version_major, 0); + uint32 minor = vci_GetMainRelVar(info, vcimrv_ros_version_minor, 0); + + if ((major == 0) && (minor == 0)) + ereport(ERROR, (errmsg("ROS has not been formated yet."), + errhint("This might happen when CREATE INDEX fails. " + "\"DROP INDEX %s;\" and CREATE INDEX again may help." , + RelationGetRelationName(info->rel)))); + + if ((VCI_ROS_VERSION_MAJOR != major) || (VCI_ROS_VERSION_MINOR != minor)) + ereport(ERROR, (errmsg("incompatible VCI version: expected (%d, %d), stored (%d, %d).", VCI_ROS_VERSION_MAJOR, VCI_ROS_VERSION_MINOR, major, minor), + errhint("This can happen when accessing old database with newer VCI modules. DROP and CREATE INDEX may help."))); +} + +static int32 +GetNumberOfExtentsFromSizeOfMainRelation(Relation rel) +{ + const int headerBlockNumber = vcimrv_extent_info >> VCI_MRV_PAGE_SHIFT; + const int maxExtentInfoInFirstPage = (BLCKSZ - + (vcimrv_extent_info & VCI_MRV_MASK_OFFSET)) / + sizeof(vcis_m_extent_t); + const int maxExtentInfoInPage = VCI_MAX_PAGE_SPACE / + sizeof(vcis_m_extent_t); + int numBlocks = RelationGetNumberOfBlocks(rel); + + if (numBlocks <= headerBlockNumber) + return -1; + + return ((numBlocks - (headerBlockNumber + 1)) * maxExtentInfoInPage) + + maxExtentInfoInFirstPage; +} + +static void +UpdateNumberOfExtentsInMainRelHeader(vci_MainRelHeaderInfo *info) +{ +#ifndef UNUSE_RECORDED_NUM_EXTENTS + if (vci_rc_query == info->command) + info->num_extents_allocated = GetNumberOfExtentsFromSizeOfMainRelation( + info->rel); + else +#endif /* #ifndef UNUSE_RECORDED_NUM_EXTENTS */ + info->num_extents_allocated = -1; +} + +/** + * @brief Keep DB pages of VCI header in buffer. + * This function acquire one read lock with AccessShareLock. + * This is called only by vci_inner_build(). + * @param[in] info Pointer to the target vci_MainRelHeaderInfo. + */ +void +vci_KeepMainRelHeaderWithoutVersionCheck(vci_MainRelHeaderInfo *info) +{ + Assert(info); + Assert(RelationIsValid(info->rel)); + ereport(DEBUG3, (errmsg("open VCI \"%s\" ignoring ROS version", + RelationGetRelationName(info->rel)))); + KeepMainRelHeader(info); +} + +/** + * @brief Keep DB pages of VCI header in buffer after checking the ROS version. + * This function acquire one read lock with AccessShareLock. + * @param[in] info Pointer to the target vci_MainRelHeaderInfo. + */ +void +vci_KeepMainRelHeader(vci_MainRelHeaderInfo *info) +{ + Assert(info); + Assert(RelationIsValid(info->rel)); + ereport(DEBUG3, (errmsg("open VCI \"%s\"", + RelationGetRelationName(info->rel)))); + KeepMainRelHeader(info); + CheckRosVersion(info); + UpdateNumberOfExtentsInMainRelHeader(info); +} + +/** + * @brief Write header pages of VCI main relation. + * @param[in] info Pointer to the target vci_MainRelHeaderInfo. + * @param[in] writeArea Give vci_wmrv_update for updating the pages for + * recovery, or vci_wmrv_all for all pages. + * The latter should only be used in building the index. + */ +void +vci_WriteMainRelVar(vci_MainRelHeaderInfo *info, + vci_wmrv_t writeArea) +{ + int blockNum; + int start = 0; + + Assert(NULL != info); + Assert(NULL != info->rel); + + ereport(DEBUG3, (errmsg("flush header pages of VCI \"%s\" main relation", + RelationGetRelationName(info->rel)))); + + switch (writeArea) + { + case vci_wmrv_update: + start = lengthof(info->buffer) - 1; + break; + case vci_wmrv_all: + start = 0; + break; + default: + ereport(ERROR, (errmsg("internal error. unsupported parameter."), errhint("Disable VCI by 'SELECT vci_disable();'"))); + } + + for (blockNum = start; blockNum < lengthof(info->buffer); ++blockNum) + { + LockBuffer(info->buffer[blockNum], BUFFER_LOCK_EXCLUSIVE); + MarkBufferDirty(info->buffer[blockNum]); + vci_WriteOneItemPage(info->rel, info->buffer[blockNum]); + LockBuffer(info->buffer[blockNum], BUFFER_LOCK_UNLOCK); + } +} + +/** + * @brief Release buffer for the VCI header. + * This function release one read lock with AccessShareLock. + * @param[in] info Pointer to the target vci_MainRelHeaderInfo. + */ +void +vci_ReleaseMainRelHeader(vci_MainRelHeaderInfo *info) +{ + int blockNum; + + Assert(NULL != info); + Assert(NULL != info->rel); + + ereport(DEBUG3, (errmsg("release VCI \"%s\"", + RelationGetRelationName(info->rel)))); + for (blockNum = 0; blockNum < lengthof(info->buffer); ++blockNum) + { + ReleaseBuffer(info->buffer[blockNum]); + info->buffer[blockNum] = InvalidBuffer; + } + info->rel = NULL; +} + +/** + * @brief Set values in the header part of VCI main relation. + * @param[in] info Pointer to the target vci_MainRelHeaderInfo. + * @param[in] var "virtual address" of the variable, defined in + * enum vci_MainRelVar. + * @param[in] elemId Give 0 normally. + * When the target variable has multiple of elements, say an array, + * the element ID should be placed. + * @param[in] value The value to write. + */ +void +vci_SetMainRelVar(vci_MainRelHeaderInfo *info, + vci_MainRelVar var, + int elemId, + uint32 value) +{ + Page page; + unsigned int blockNumber = vci_MRVGetBlockNumber(var); + unsigned int offset = vci_MRVGetOffset(var); + + Assert(blockNumber < lengthof(info->buffer)); + Assert(offset < BLCKSZ); + + page = BufferGetPage(info->buffer[blockNumber]); + ((uint32 *) &(((char *) page)[offset]))[elemId] = value; +} + +/** + * @brief Get values in the header part of VCI main relation. + * @param[in] info Pointer to the target vci_MainRelHeaderInfo. + * @param[in] var "virtual address" of the variable, defined in + * enum vci_MainRelVar. + * @param[in] elemId Give 0 normally. + * When the target variable has multiple of elements, say an array, + * the element ID should be placed. + * @return The gotten value. + */ +uint32 +vci_GetMainRelVar(vci_MainRelHeaderInfo *info, + vci_MainRelVar var, + int elemId) +{ + Page page; + unsigned int blockNumber = vci_MRVGetBlockNumber(var); + unsigned int offset = vci_MRVGetOffset(var); + + Assert(blockNumber < lengthof(info->buffer)); + Assert(offset < BLCKSZ); + page = BufferGetPage(info->buffer[blockNumber]); + + return ((uint32 *) &(((char *) page)[offset]))[elemId]; +} + +/** + * @brief Get the position of column information in the VCI main relation. + * @param[in] columnId The column ID in the VCI index. + * @return The offset in the page, which including DB page header part. + */ +vci_MainRelVar +vci_GetMColumnPosition(int16 columnId) +{ + const int firstBlockNumber = vci_MRVGetBlockNumber(vcimrv_column_info); + const int numInFirstPage = (BLCKSZ - vci_MRVGetOffset(vcimrv_column_info)) / + sizeof(vcis_m_column_t); + const int numInPage = VCI_MAX_PAGE_SPACE / sizeof(vcis_m_column_t); + int blockNumber; + + Assert(VCI_FIRST_NORMALCOLUMN_ID <= columnId); + /* + */ + if (columnId < numInFirstPage) + { + return (firstBlockNumber << VCI_MRV_PAGE_SHIFT) + + vci_MRVGetOffset(vcimrv_column_info) + + (columnId * sizeof(vcis_m_column_t)); + } + + columnId -= numInFirstPage; + blockNumber = columnId / numInPage; + columnId -= blockNumber * numInPage; + blockNumber += 1 + firstBlockNumber; + Assert(blockNumber < (VCI_NUM_MAIN_REL_HEADER_PAGES - 1)); + + return (blockNumber << VCI_MRV_PAGE_SHIFT) + + VCI_MIN_PAGE_HEADER + + (columnId * sizeof(vcis_m_column_t)); +} + +/** + * @brief Get the column information in the VCI main relation. + * @param[in] info Pointer to the target vci_MainRelHeaderInfo. + * @param[in] columnId The column ID in the VCI index. + * @return The pointer to the column information in the header page of + * VCI main relation. + * + * @note + * AFTER ACCESSING vcis_m_column_t, RELEASE BUFFER WITH ReleaseBuffer(buffer); + */ +vcis_m_column_t * +vci_GetMColumn(vci_MainRelHeaderInfo *info, int16 columnId) +{ + Page page; + vci_MainRelVar mrv = vci_GetMColumnPosition(columnId); + + page = BufferGetPage(info->buffer[vci_MRVGetBlockNumber(mrv)]); + + return (vcis_m_column_t *) &(((char *) page)[vci_MRVGetOffset(mrv)]); +} + + +static void +WriteAllItemsInPage(Relation rel, + Buffer buffer, + uint16 numItems) +{ + uint16 iId; + + for (iId = 0; iId < numItems; ++ iId) + vci_WriteItem(rel, buffer, iId + FirstOffsetNumber); +} + +/** + * @brief + * This function checks if the relation has the DB page with the page ID + * blockNumber. + * When it does not exists, the function extends the relation and initialize + * extended pages with one item per page. + * @param[in] rel The relation. + * @param[in] blockNumber The block number to be examined. + * @param[in] numItems The number of items the page is initialized with. + * @param[in] forceInit If true, the block is initialized anyway. + * @param[in] logItems If true, write all items in the pages into WAL. + */ +void +vci_PreparePagesIfNecessaryCore(Relation rel, + BlockNumber blockNumber, + uint16 numItems, + bool forceInit, + bool logItems) +{ + BlockNumber existingPages = RelationGetNumberOfBlocks(rel); + + Assert(0 < numItems); + + if (!BlockNumberIsValid(blockNumber)) + ereport(ERROR, (errmsg("data relation full"), errhint("Normally relations of VCI index are smaller than the table relation, therefore this error must not happen. Disable VCI by 'SELECT vci_disable();'"))); + + if (existingPages <= blockNumber) + { + BlockNumber pId; + + for (pId = existingPages; pId <= blockNumber; ++pId) + { + Buffer buffer = ReadBufferExtended(rel, MAIN_FORKNUM, + P_NEW, RBM_ZERO_AND_LOCK, NULL); + vci_InitPageCore(buffer, numItems, true); + if (logItems) + WriteAllItemsInPage(rel, buffer, numItems); + UnlockReleaseBuffer(buffer); + } + } + else + { + Buffer buffer = ReadBuffer(rel, blockNumber); + Page page = BufferGetPage(buffer); + bool needUnlock = false; + + if (PageIsNew(page) || forceInit) + { + vci_InitPageCore(buffer, numItems, false); + + if (logItems) + { + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + WriteAllItemsInPage(rel, buffer, numItems); + needUnlock = true; + } + } + if (needUnlock) + UnlockReleaseBuffer(buffer); + else + ReleaseBuffer(buffer); + } +} + +/** + * @brief + * This function write items of given number in the buffer, + * without page lock. + * @param[in] buffer Postgres DB buffer to be initialized. + * @param[in] numItems The number of items the page is initialized with. + * @param[in] locked true if the buffer is locked , false otherwise. + */ +static void +InitPageCoreWithoutLock(Buffer buffer, int16 numItems) +{ + uint32 size; + uint32 itemSize; + int32 aId; + Page page = BufferGetPage(buffer); + PageHeader pageHeader = (PageHeader) page; + + PageInit(page, BLCKSZ, 0); + pageHeader->pd_lower += sizeof(ItemIdData) * numItems; + size = pageHeader->pd_upper - pageHeader->pd_lower; + itemSize = vci_RoundDownValue(size / numItems, + VCI_DATA_ALIGNMENT_IN_STORAGE); + for (aId = numItems; aId--; ) + { + HeapTupleHeader hTup; + + pageHeader->pd_upper -= itemSize; + pageHeader->pd_linp[aId].lp_off = pageHeader->pd_upper; + pageHeader->pd_linp[aId].lp_len = itemSize; + pageHeader->pd_linp[aId].lp_flags = LP_NORMAL; + hTup = (HeapTupleHeader) PageGetItem(page, &(pageHeader->pd_linp[aId])); + hTup->t_infomask2 = 0; + hTup->t_infomask = HEAP_XMIN_FROZEN | HEAP_XMAX_INVALID; + hTup->t_hoff = vci_RoundUpValue(offsetof(HeapTupleHeaderData, t_bits), + VCI_DATA_ALIGNMENT_IN_STORAGE); + } + MarkBufferDirty(buffer); + Assert(pageHeader->pd_lower <= pageHeader->pd_upper); +} + +/** + * @brief + * This function write items of given number in the buffer. + * @param[in] buffer Postgres DB buffer to be initialized. + * @param[in] numItems The number of items the page is initialized with. + */ +void +vci_InitPageCore(Buffer buffer, int16 numItems, bool locked) +{ + if (!locked) + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + InitPageCoreWithoutLock(buffer, numItems); + + if (!locked) + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); +} + +/** + * @brief + * This function mark the buffer dirty, and make WAL from the item + * in the buffer. + * We assume that the relation is only modified by ROS command exclusively. + * So, we do not put strict lock here. + * @param[in] rel The relation. + * @param[in] buffer PostgreSQL DB buffer having the page data. + * @param[in] numItems The number of items the page is initialized with. + */ +void +vci_WriteItem(Relation rel, + Buffer buffer, + OffsetNumber offsetNumber) +{ + Page page = BufferGetPage(buffer); + ItemId tup = PageGetItemId(page, offsetNumber); + HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, tup); + + Assert(BufferIsValid(buffer)); + Assert(OffsetNumberIsValid(offsetNumber)); + + + MarkBufferDirty(buffer); + + if (RelationNeedsWAL(rel)) + { + xl_heap_inplace xlrec; + XLogRecPtr recptr; + uint8 info = 0; + uint32 newlen; + + xlrec.offnum = offsetNumber; + + /* originally taken from heap_inplace_update() + * in src/backend/access/heap/heapam.c + */ + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfHeapInplace); + + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + newlen = VCI_ITEM_SPACE(PageGetMaxOffsetNumber(page)); + + XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen); + + START_CRIT_SECTION(); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE | info); + + PageSetLSN(page, recptr); + + END_CRIT_SECTION(); + } +} + +/** + * @brief Get the column widths in the worst cases. + * @param attr Attribute information of the columns. + * @return The width in the worst case. + */ +int16 +vci_GetColumnWorstSize(Form_pg_attribute attr) +{ + if (0 <= attr->attlen) /* fixed length data */ + return attr->attlen; + + /* variable or long length data */ + if (0 <= attr->atttypmod) + { + int32 columnSize; + + switch (attr->atttypid) + { + /* for bit(n), varbit(n). */ + case BITOID: + case VARBITOID: + columnSize = VARBITTOTALLEN(attr->atttypmod); + break; + + /* for numeric(p,q), retrun 'p'+LL . */ + case NUMERICOID: + columnSize = (attr->atttypmod >> 16) + VARHDRSZ; + break; + + case BPCHAROID: + case VARCHAROID: + if (attr->atttypmod < VARHDRSZ) + columnSize = (attr->atttypmod - VARHDRSZ) * MAX_MULTIBYTE_CHAR_LEN + VARHDRSZ; + else + columnSize = attr->atttypmod * MAX_MULTIBYTE_CHAR_LEN; + break; + + default: + { +#ifdef VCI_USE_COMPACT_VARLENA + if (attr->atttypmod < VARATT_SHORT_MAX) + columnSize = attr->atttypmod - VARHDRSZ + VARHDRSZ_SHORT; + else + columnSize = attr->atttypmod; +#else + columnSize = attr->atttypmod; +#endif + } + break; + } + + if (columnSize < MaxHeapTupleSize) + return (int16) columnSize; + } + + /* worst size -> MaxHeapTupleSize(8k) */ + /* unlimited data size */ + return MaxHeapTupleSize; +} + +/** + * @brief Count number of nullable columns in a relation + * with tuple descriptor. + * @param[in] rel Relation + * @return Number of nullable columns in the relation. + */ +int +vci_GetNumberOfNullableColumn(Relation rel) +{ + int result = 0; + TupleDesc tupleDesc = RelationGetDescr(rel); + int aId; + + for (aId = 0; aId < tupleDesc->natts; ++aId) + { + Assert(!((tupleDesc->attrs)[aId]->attnotnull)); + ++result; + } + + return result; +} + +static Buffer +ReadBufferWithPageInitCore(Relation reln, BlockNumber blockNumber, int16 numItem) +{ + Buffer buffer; + Page page; + + Assert((reln->rd_rel->relkind == 'i') || (reln->rd_rel->relkind == 'v')); + buffer = ReadBuffer(reln, blockNumber); + + page = BufferGetPage(buffer); + if (PageIsNew(page)) + { + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + if (PageIsNew(page)) + InitPageCoreWithoutLock(buffer, numItem); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + } + + return buffer; +} + +/** + * @brief Read a buffer containing the requested block of the requested VCI + * relation. + * + * Same as ReadBuffer(), but initialize new page. + * + * We must generally use this function instead of ReadBuffer(), to access a kind + * of VCI relations except Data WOS, Whiteout WOS, and delete vector. But we + * don't need to replace ReadBuffer() immediately after vci_PreparePagesIfNecessaryCore(). + * + * @param[in] reln The relation. + * @param[in] blockNumber The block number to be red. + */ +Buffer +vci_ReadBufferWithPageInit(Relation reln, BlockNumber blockNumber) +{ + return ReadBufferWithPageInitCore(reln, blockNumber, 1); +} diff --git a/contrib/vci/storage/vci_tidcrid.c b/contrib/vci/storage/vci_tidcrid.c new file mode 100644 index 0000000..ab06eb1 --- /dev/null +++ b/contrib/vci/storage/vci_tidcrid.c @@ -0,0 +1,173 @@ +/*------------------------------------------------------------------------- + * + * vci_tidcrid.c + * + * Copyright (c) 2014-2016 FUJITSU LIMITED All rights reserved. + */ + +#include "postgres.h" +#include "vci.h" +#include "vci_ros.h" +#include "vci_tidcrid.h" +#include "vci_columns.h" + +static void InitializeTidCridUpdateList(Oid relOid); +static vcis_tidcrid_meta_t *vci_GetTidCridMeta(vci_TidCridRelations *relPair); +static vcis_tidcrid_pagetag_t *vci_GetTidCridTag(vci_TidCridRelations *relPair, BlockNumber blk); + +/** + * function to cast from Page to (vcis_tidcrid_pair_list_t *). + */ +#define vci_GetTidCridPairListT(page) \ + ((vcis_tidcrid_pair_list_t *) &((page)[VCI_MIN_PAGE_HEADER])) + +/* ************************************** + * Initialize + * ************************************* + */ +static void +InitializeTidCridUpdateList(Oid relOid) +{ + Relation rel = heap_open(relOid, ShareLock); + Buffer buffer; + Page page; + vcis_tidcrid_pair_list_t *pairList; + BlockNumber blockNumber = VCI_TID_CRID_UPDATE_HEADER_PAGE_ID; + + Assert(offsetof(vcis_tidcrid_pair_list_t, body) == VCI_TID_CRID_UPDATE_PAGE_SPACE); + + vci_PreparePagesWithOneItemIfNecessary(rel, blockNumber); + buffer = ReadBuffer(rel, blockNumber); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buffer); + pairList = vci_GetTidCridPairListT(page); + pairList->num = 0; + + vci_WriteOneItemPage(rel, buffer); + UnlockReleaseBuffer(buffer); + heap_close(rel, ShareLock); +} + +void +vci_InitializeTidCridUpdateLists(vci_MainRelHeaderInfo *info) +{ + Oid oid; + + oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, 0); + InitializeTidCridUpdateList(oid); + oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_1, 0); + InitializeTidCridUpdateList(oid); +} + +void +vci_InitializeTidCridTree(vci_MainRelHeaderInfo *info) +{ + LOCKMODE lockmode = ShareLock; + + vci_TidCridRelations relPairData = {0}; + vci_TidCridRelations *relPair = &relPairData; + vcis_tidcrid_meta_t *tidcridMeta; + vcis_tidcrid_pagetag_t *tidcridTag; + vci_OpenTidCridRelations(relPair, info, lockmode); + + /* --- Meta --- */ + vci_FormatPageWithOneItem(relPair->meta, + VCI_TID_CRID_DATA_FIRST_PAGE_ID); + tidcridMeta = vci_GetTidCridMeta(relPair); + LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE); + tidcridMeta->free_page_begin_id = VCI_TID_CRID_DATA_FIRST_PAGE_ID; + tidcridMeta->free_page_begin_id_old = VCI_TID_CRID_DATA_FIRST_PAGE_ID; + tidcridMeta->free_page_end_id = VCI_TID_CRID_DATA_FIRST_PAGE_ID; + tidcridMeta->free_page_end_id_old = VCI_TID_CRID_DATA_FIRST_PAGE_ID; + tidcridMeta->free_page_prev_id = InvalidBlockNumber; + tidcridMeta->free_page_next_id = InvalidBlockNumber; + tidcridMeta->num_free_pages = 1; + tidcridMeta->num_free_pages_old = 1; + tidcridMeta->num_free_page_blocks = 1; + tidcridMeta->num_free_page_blocks_old = 1; + tidcridMeta->num = 0; + tidcridMeta->num_old = 0; + tidcridMeta->free_block_number = 1; + tidcridMeta->offset = offsetof(vcis_tidcrid_meta_t, body); + + /* need to set invalid to first item ? */ + vci_WriteOneItemPage(relPair->meta, relPair->bufMeta); + UnlockReleaseBuffer(relPair->bufMeta); + + /* --- Data --- */ + vci_FormatPageWithItems(relPair->data, + VCI_TID_CRID_DATA_FIRST_PAGE_ID, + VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE); + tidcridTag = vci_GetTidCridTag(relPair, VCI_TID_CRID_DATA_FIRST_PAGE_ID); + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + tidcridTag->size = MaxBlockNumber; + tidcridTag->type = vcis_tidcrid_type_pagetag; + tidcridTag->prev_pos = InvalidBlockNumber; + tidcridTag->next_pos = InvalidBlockNumber; + tidcridTag->num = 0; + tidcridTag->free_size = VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - 1; + tidcridTag->bitmap = 0x1; + vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID); + UnlockReleaseBuffer(relPair->bufData); + vci_CloseTidCridRelations(relPair, lockmode); +} + + +/* ************************************** + * TID CRID Tree Functions + * ************************************* + */ +void +vci_OpenTidCridRelations(vci_TidCridRelations *rel, + vci_MainRelHeaderInfo *info, + LOCKMODE lockmode) +{ + rel->meta = heap_open(vci_GetMainRelVar(info, vcimrv_tid_crid_meta_oid, 0), lockmode); + rel->data = heap_open(vci_GetMainRelVar(info, vcimrv_tid_crid_data_oid, 0), lockmode); + + rel->info = info; +} + +void +vci_CloseTidCridRelations(vci_TidCridRelations *rel, LOCKMODE lockmode) +{ + if (rel) + { + if (RelationIsValid(rel->data)) + heap_close(rel->data, lockmode); + if (RelationIsValid(rel->meta)) + heap_close(rel->meta, lockmode); + } +} + +#define vci_GetTidCridMetaT(page) \ + ((vcis_tidcrid_meta_t *)& ((page)[VCI_MIN_PAGE_HEADER])) + +static vcis_tidcrid_meta_t * +vci_GetTidCridMeta(vci_TidCridRelations *relPair) +{ + Page page; + + relPair->bufMeta = vci_ReadBufferWithPageInit(relPair->meta, VCI_COLUMN_META_HEADER_PAGE_ID); + page = BufferGetPage(relPair->bufMeta); + + return vci_GetTidCridMetaT(page); +} + +static vcis_tidcrid_pagetag_t * +vci_GetTidCridTag(vci_TidCridRelations *relPair, BlockNumber blk) +{ + Page page; + HeapTupleHeader htup; + + relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, blk); + page = BufferGetPage(relPair->bufData); + + htup = (HeapTupleHeader) PageGetItem(page, + PageGetItemId(page, VCI_TID_CRID_PAGETAG_ITEM_ID)); + + return (vcis_tidcrid_pagetag_t *) ((char *) htup + htup->t_hoff); +} + + + diff --git a/contrib/vci/storage/vci_xact.c b/contrib/vci/storage/vci_xact.c new file mode 100644 index 0000000..78dc3c4 --- /dev/null +++ b/contrib/vci/storage/vci_xact.c @@ -0,0 +1,39 @@ +/*------------------------------------------------------------------------- + * + * vci_xact.c + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_xact.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "access/xact.h" +#include "storage/procarray.h" +#include "vci.h" +#include "vci_ros.h" +#include "vci_xact.h" + +const int xid_shift_bits = 30; + +int64 +vci_GenerateXid64(TransactionId target_xid, vci_MainRelHeaderInfo *info) +{ + uint64 xid_gen; + TransactionId base_xid; + uint32 base_xid_upper_bits; + uint32 target_xid_upper_bits; + int32 diff; + + xid_gen = (uint64) vci_GetMainRelVar(info, vcimrv_xid_generation, 0); + base_xid = vci_GetMainRelVar(info, vcimrv_xid_gen_udpate_xid, 0); + + base_xid_upper_bits = ((uint32) base_xid) >> xid_shift_bits; + target_xid_upper_bits = ((uint32) target_xid) >> xid_shift_bits; + + diff = (target_xid_upper_bits - base_xid_upper_bits) << xid_shift_bits; + + return (int64) (((xid_gen + (diff >> xid_shift_bits)) << 32) | (uint64) target_xid); +} diff --git a/contrib/vci/vci--1.0.sql b/contrib/vci/vci--1.0.sql new file mode 100644 index 0000000..9faf61d --- /dev/null +++ b/contrib/vci/vci--1.0.sql @@ -0,0 +1,33 @@ +CREATE FUNCTION vci_handler(internal) +RETURNS index_am_handler +AS 'MODULE_PATHNAME' +LANGUAGE C VOLATILE STRICT; + +CREATE ACCESS METHOD vci TYPE index HANDLER vci_handler; + +CREATE OPERATOR CLASS bool_ops DEFAULT FOR TYPE bool USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS bytea_ops DEFAULT FOR TYPE bytea USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS char_ops DEFAULT FOR TYPE "char" USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS name_ops DEFAULT FOR TYPE name USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS int8_ops DEFAULT FOR TYPE int8 USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS int2_ops DEFAULT FOR TYPE int2 USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS int4_ops DEFAULT FOR TYPE int4 USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS text_ops DEFAULT FOR TYPE text USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS float4_ops DEFAULT FOR TYPE float4 USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS float8_ops DEFAULT FOR TYPE float8 USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS abstime_ops DEFAULT FOR TYPE abstime USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS reltime_ops DEFAULT FOR TYPE reltime USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS tinterval_ops DEFAULT FOR TYPE tinterval USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS money_ops DEFAULT FOR TYPE money USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS bpchar_ops DEFAULT FOR TYPE bpchar USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS date_ops DEFAULT FOR TYPE date USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS time_ops DEFAULT FOR TYPE time USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS timestamp_ops DEFAULT FOR TYPE timestamp USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS timestamptz_ops DEFAULT FOR TYPE timestamptz USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS interval_ops DEFAULT FOR TYPE interval USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS timetz_ops DEFAULT FOR TYPE timetz USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS bit_ops DEFAULT FOR TYPE bit USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS varbit_ops DEFAULT FOR TYPE varbit USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS numeric_ops DEFAULT FOR TYPE numeric USING vci AS OPERATOR 1 =; +CREATE OPERATOR CLASS uuid_ops DEFAULT FOR TYPE uuid USING vci AS OPERATOR 1 =; + diff --git a/contrib/vci/vci.control b/contrib/vci/vci.control new file mode 100644 index 0000000..d347a34 --- /dev/null +++ b/contrib/vci/vci.control @@ -0,0 +1,6 @@ +# vci extension +comment = 'vertical clustered index' +default_version = '1.0' +module_pathname = '$libdir/vci' +relocatable = false + diff --git a/contrib/vci/vci_main.c b/contrib/vci/vci_main.c new file mode 100644 index 0000000..3f5a5bf --- /dev/null +++ b/contrib/vci/vci_main.c @@ -0,0 +1,148 @@ +/*------------------------------------------------------------------------- + * + * vci_main.c + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/vci_main.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "commands/tablecmds.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/guc_tables.h" +#include "vci.h" +#include "vci_ros.h" + +#ifdef WIN32 +#include "storage/fd.h" +#endif + +PG_MODULE_MAGIC; + +/* + * Commands which re-index VCI. + */ +vci_RebuildCommand vci_rebuild_command = vcirc_invalid; + +VciGucStruct VciGuc; + +/* saved hook value in case of unload */ +ProcessUtility_hook_type process_utility_prev = NULL; + +static struct config_bool VciConfigureNamesBool[] = +{ + { + { + "vci.enable", + PGC_USERSET, RESOURCES_MEM, + "Enables the VCI.", + NULL, + }, + &VciGuc.enable, + true, + NULL, NULL, NULL, + }, +}; + +extern void _PG_init(void); +static void vci_read_guc_variables(void); + +/* + * _PG_init: Entry point of this module. + * It is called when the module is loaded. + */ +void +_PG_init(void) +{ + if (!process_shared_preload_libraries_in_progress) + { + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("\"%s\" must be registered in shared_preload_libraries", VCI_STRING))); + return; /* LCOV_EXCL_LINE */ + } + + vci_read_guc_variables(); + + if (!IsPostmasterEnvironment) + { + VciGuc.enable = 0; + } + + /* register process utilityhook */ + process_utility_prev = ProcessUtility_hook; + ProcessUtility_hook = vci_process_utility; + + /* register function to custom hook */ + add_index_delete_hook = vci_add_index_delete; + add_drop_relation_hook = vci_add_drop_relation; + add_skip_vci_index_hook = vci_add_skip_vci_index; + + /* If single user mode, not set environment for parallel. */ + if (IsPostmasterEnvironment) + { + if (!IsUnderPostmaster) + { +#ifdef WIN32 + struct stat st; + char *dir_name = "base\\" PG_TEMP_FILES_DIR; + + if (stat(dir_name, &st) == 0) + { + if (!S_ISDIR(st.st_mode)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("\"%s\" is not directory", dir_name))); + } + else + { + if (errno == ENOENT) + { + if (mkdir(dir_name, S_IRWXU) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + dir_name))); + } + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat directory \"%s\": %m", + dir_name))); + } +#endif + + } + } +} + +/**/ +static void +vci_read_guc_variables(void) +{ + int i; + + + for (i = 0; i < (int) lengthof(VciConfigureNamesBool); i++) + { + struct config_bool *conf = &VciConfigureNamesBool[i]; + + if (IsPostmasterEnvironment) + DefineCustomBoolVariable(conf->gen.name, + conf->gen.short_desc, + conf->gen.long_desc, + conf->variable, + conf->boot_val, + conf->gen.context, + conf->gen.flags, + conf->check_hook, + conf->assign_hook, + conf->show_hook); + else + *(conf->variable) = conf->boot_val; + + } +} diff --git a/contrib/vci/vci_supported_funcs.sql b/contrib/vci/vci_supported_funcs.sql new file mode 100644 index 0000000..40da9f3 --- /dev/null +++ b/contrib/vci/vci_supported_funcs.sql @@ -0,0 +1,64 @@ +CREATE TEMPORARY TABLE test (funcoid oid); +INSERT INTO test (funcoid) SELECT unnest(ARRAY[aggfnoid, aggtransfn, aggfinalfn, aggmtransfn, aggminvtransfn, aggmfinalfn]) FROM pg_aggregate; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[aminsert, ambeginscan, amgettuple, amgetbitmap, amrescan, amendscan, ammarkpos, amrestrpos, ambuild, ambuildempty, ambulkdelete, amvacuumcleanup, amcanreturn, amcostestimate, amoptions]) FROM pg_am; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[amproc]) FROM pg_amproc; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[castfunc]) FROM pg_cast; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[conproc]) FROM pg_conversion; +INSERT INTO test (funcoid) SELECT evtfoid FROM pg_event_trigger; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[fdwhandler, fdwvalidator]) FROM pg_foreign_data_wrapper; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[lanplcallfoid, laninline, lanvalidator]) FROM pg_language; +INSERT INTO test (funcoid) SELECT tgfoid FROM pg_trigger; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[oprcode, oprrest, oprjoin]) FROM pg_operator; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[rngcanonical, rngsubdiff]) FROM pg_range; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[prsstart, prstoken, prsend, prsheadline, prslextype]) FROM pg_ts_parser; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[tmplinit, tmpllexize]) FROM pg_ts_template; +INSERT INTO test (funcoid) SELECT unnest(ARRAY[typinput, typoutput, typreceive, typsend, typmodin, typmodout, typanalyze]) FROM pg_type; + +CREATE TABLE sys_func_table (funcoid oid UNIQUE); +INSERT INTO sys_func_table SELECT distinct funcoid FROM test WHERE funcoid > 0 ORDER BY funcoid; + +CREATE TABLE safe_types (typeoid oid UNIQUE); +INSERT INTO safe_types (typeoid) VALUES + (16), -- bool + (17), -- bytea + (18), -- char + (19), -- name + (20), -- int8 + (21), -- int2 + (23), -- int4 + (25), -- text + (700), -- float4 + (701), -- float8 + (702), -- abstime + (703), -- reltime + (704), -- tinterval + (790), -- money + (1003), -- _name + (1005), -- _int2 + (1007), -- _int4 + (1009), -- _text + (1021), -- _float4 + (1042), -- bpchar + (1082), -- date + (1083), -- time + (1114), -- timestamp + (1184), -- timestamptz + (1186), -- interval + (1266), -- timetz + (1560), -- bit + (1700), -- numeric + (2249), -- record + (2276), -- any + (2277), -- anyarray + (2278), -- void + (2283), -- anyelement + (3500); -- anyenum + +CREATE FUNCTION print_typename(IN oids _oid) RETURNS _name AS $$ + SELECT array_agg(pg_type.typname) FROM unnest(oids) AS t(i), pg_type WHERE i = pg_type.oid; +$$ LANGUAGE SQL; + +SELECT oid, proname, provolatile, prolang, print_typename(array_prepend(prorettype, proargtypes)) FROM pg_proc WHERE NOT proisagg AND NOT proiswindow AND NOT proretset + AND NOT EXISTS (SELECT funcoid FROM sys_func_table WHERE pg_proc.oid = sys_func_table.funcoid) + AND (SELECT bool_and(i IN (SELECT typeoid FROM safe_types)) FROM unnest(array_prepend(prorettype, proargtypes)) AS t(i)) + AND oid < 16384 ORDER BY oid; diff --git a/contrib/vci/vci_supported_types.c b/contrib/vci/vci_supported_types.c new file mode 100644 index 0000000..ab40c98 --- /dev/null +++ b/contrib/vci/vci_supported_types.c @@ -0,0 +1,135 @@ +/*------------------------------------------------------------------------- + * + * vci_supported_types.c + * + * + * SELECT oid, typname FROM pg_type WHERE typnamespace = 11 AND typrelid = 0 AND typelem = 0 ORDER BY oid; + * + * + * Copyright (c) 2015 FUJITSU LIMITED All rights reserved. + */ +#include "postgres.h" +#include "vci.h" + +#define VCI_SUPPORTED_TYPE_MIN (16) +#define VCI_SUPPORTED_TYPE_MAX (3926) + +static const struct { + Oid oid; + const char *name; + bool is_support; +} vci_supported_type_table[] = { + { 16, "bool", true }, /* BOOLOID */ + { 17, "bytea", true }, /* BYTEAOID */ + { 18, "char", true }, /* CHAROID */ + { 19, "name", true }, /* NAMEOID */ + { 20, "int8", true }, /* INT8OID */ + { 21, "int2", true }, /* INT2OID */ + { 23, "int4", true }, /* INT4OID */ + { 24, "regproc", false}, /* REGPROCOID */ + { 25, "text", true }, /* TEXTOID */ + { 26, "oid", false}, /* OIDOID */ + { 27, "tid", false}, /* TIDOID */ + { 28, "xid", false}, /* XIDOID */ + { 29, "cid", false}, /* CIDOID */ + { 114, "json", false}, /* JSONOID */ + { 142, "xml", false}, /* XMLOID */ + { 194, "pg_node_tree", false}, /* PGNODETREEOID */ + { 210, "smgr", false}, + { 602, "path", false}, /* PATHOID */ + { 604, "polygon", false}, /* POLYGONOID */ + { 650, "cidr", false}, /* CIDROID */ + { 700, "float4", true }, /* FLOAT4OID */ + { 701, "float8", true }, /* FLOAT8OID */ + { 702, "abstime", true }, /* ABSTIMEOID */ + { 703, "reltime", true }, /* RELTIMEOID */ + { 704, "tinterval", true }, /* TINTERVALOID */ + { 705, "unknown", false}, /* UNKNOWNOID */ + { 718, "circle", false}, /* CIRCLEOID */ + { 790, "money", true }, /* CASHOID */ + { 829, "macaddr", false}, /* MACADDROID */ + { 869, "inet", false}, /* INETOID */ + {1033, "aclitem", false}, /* ACLITEMOID */ + {1042, "bpchar", true }, /* BPCHAROID */ + {1043, "varchar", true }, /* VARCHAROID */ + {1082, "date", true }, /* DATEOID */ + {1083, "time", true }, /* TIMEOID */ + {1114, "timestamp", true }, /* TIMESTAMPOID */ + {1184, "timestamptz", true }, /* TIMESTAMPTZOID */ + {1186, "interval", true }, /* INTERVALOID */ + {1266, "timetz", true }, /* TIMETZOID */ + {1560, "bit", true }, /* BITOID */ + {1562, "varbit", true }, /* VARBITOID */ + {1700, "numeric", true }, /* NUMERICOID */ + {1790, "refcursor", false}, /* REFCURSOROID */ + {2202, "regprocedure", false}, /* REGPROCEDUREOID */ + {2203, "regoper", false}, /* REGOPEROID */ + {2204, "regoperator", false}, /* REGOPERATOROID */ + {2205, "regclass", false}, /* REGCLASSOID */ + {2206, "regtype", false}, /* REGTYPEOID */ + {2249, "record", false}, /* RECORDOID */ + {2275, "cstring", false}, /* CSTRINGOID */ + {2276, "any", false}, /* ANYOID */ + {2277, "anyarray", false}, /* ANYARRAYOID */ + {2278, "void", false}, /* VOIDOID */ + {2279, "trigger", false}, /* TRIGGEROID */ + {2280, "language_handler", false}, /* LANGUAGE_HANDLEROID */ + {2281, "internal", false}, /* INTERNALOID */ + {2282, "opaque", false}, /* OPAQUEOID */ + {2283, "anyelement", false}, /* ANYELEMENTOID */ + {2776, "anynonarray", false}, /* ANYNONARRAYOID */ + {2950, "uuid", true }, /* UUIDOID */ + {2970, "txid_snapshot", false}, + {3115, "fdw_handler", false}, /* FDW_HANDLEROID */ + {3220, "pg_lsn", false}, /* LSNOID */ + {3500, "anyenum", false}, /* ANYENUMOID */ + {3614, "tsvector", false}, /* TSVECTOROID */ + {3615, "tsquery", false}, /* TSQUERYOID */ + {3642, "gtsvector", false}, /* GTSVECTOROID */ + {3734, "regconfig", false}, /* REGCONFIGOID */ + {3769, "regdictionary", false}, /* REGDICTIONARYOID */ + {3802, "jsonb", false}, /* JSONBOID */ + {3831, "anyrange", false}, /* ANYRANGEOID */ + {3838, "event_trigger", false}, /* EVTTRIGGEROID */ + {3904, "int4range", false}, /* INT4RANGEOID */ + {3906, "numrange", false}, + {3908, "tsrange", false}, + {3910, "tstzrange", false}, + {3912, "daterange", false}, + {3926, "int8range", false}, +}; + +bool +vci_is_supported_type(Oid oid) +{ + int min, max, pivot; + + if ((oid < VCI_SUPPORTED_TYPE_MIN) || (VCI_SUPPORTED_TYPE_MAX < oid)) + return false; + + + min = 0; + max = lengthof(vci_supported_type_table); /* exclusive */ + + while (max - min > 1) + { + Oid comp; + + pivot = (min + max) / 2; + + comp = vci_supported_type_table[pivot].oid; + + if (comp == oid) + return vci_supported_type_table[pivot].is_support; + else if (oid < comp) + max = pivot; + else /* comp < oid */ + min = pivot; + } + + if (max - min == 1) + if (oid == vci_supported_type_table[min].oid) + return vci_supported_type_table[min].is_support; + + return false; +} -- 2.7.4.windows.1