diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 011f2b9..a74081b 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1125,6 +1125,7 @@ relation_open(Oid relationId, LOCKMODE lockmode) r = RelationIdGetRelation(relationId); if (!RelationIsValid(r)) + elog(ERROR, "could not open relation with OID %u", relationId); /* Make note that we've accessed a temporary relation */ diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 43b8924..8852b9c 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -48,6 +48,7 @@ #include "utils/rel.h" #include "utils/ruleutils.h" #include "utils/syscache.h" +#include "utils/typcache.h" /* * Information about bounds of a partitioned relation @@ -89,6 +90,7 @@ typedef struct PartitionBoundInfoData * partitioned table) */ int null_index; /* Index of the null-accepting partition; -1 * if there isn't one */ + int serial; } PartitionBoundInfoData; #define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1) @@ -114,6 +116,12 @@ typedef struct PartitionRangeBound bool lower; /* this is the lower (vs upper) bound */ } PartitionRangeBound; +/* Internal representation of a hash partition bound */ +typedef struct PartitionHashBound +{ + int serial; /* hash partition serial number */ +} PartitionHashBound; + static int32 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg); static int32 qsort_partition_rbound_cmp(const void *a, const void *b, @@ -149,6 +157,49 @@ static int partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo, void *probe, bool probe_is_bound, bool *is_equal); +/* check if the relation is a hash partition */ +bool is_hash_partition(Oid relid) +{ + return is_hash_partitioned(get_partition_parent(relid)); +} + +/* check if the relation is hash partitioned */ +bool is_hash_partitioned(Oid relid) +{ + Relation rel; + bool res = false; + + rel = heap_open(relid, NoLock); + if (rel->rd_partkey) + if (rel->rd_partkey->strategy == 'h') + res = true; + + heap_close(rel, NoLock); + + return res; +} + +/* + * HashSerialGetPartition + * get hash partition oid according to its index +*/ +Oid +HashSerialGetPartition(Oid parentOid, int index) +{ + Relation parent; + PartitionDesc pdesc; + Oid child; + + parent = heap_open(parentOid, AccessShareLock); + pdesc = RelationGetPartitionDesc(parent); + + child = pdesc->oids[index]; + + heap_close(parent, AccessShareLock); + + return child; +} + /* * RelationBuildPartitionDesc * Form rel's partition descriptor @@ -179,6 +230,9 @@ RelationBuildPartitionDesc(Relation rel) /* Range partitioning specific */ PartitionRangeBound **rbounds = NULL; + /* Hash partitiNG specific */ + PartitionHashBound **hashserials = NULL; + /* * The following could happen in situations where rel has a pg_class entry * but not the pg_partitioned_table entry yet. @@ -310,6 +364,10 @@ RelationBuildPartitionDesc(Relation rel) qsort_arg(all_values, ndatums, sizeof(PartitionListValue *), qsort_partition_list_value_cmp, (void *) key); } + else if (key->strategy == PARTITION_STRATEGY_HASH) + { + + } else if (key->strategy == PARTITION_STRATEGY_RANGE) { int j, @@ -511,6 +569,15 @@ RelationBuildPartitionDesc(Relation rel) break; } + case PARTITION_STRATEGY_HASH: + { + for (i = 0; i < nparts; i++) + { + mapping[i] = next_index++; + } + break; + } + case PARTITION_STRATEGY_RANGE: { boundinfo->content = (RangeDatumContent **) palloc(ndatums * @@ -723,6 +790,13 @@ check_new_partition_bound(char *relname, Relation parent, break; } + case PARTITION_STRATEGY_HASH: + { + Assert(spec->strategy == PARTITION_STRATEGY_HASH); + + break; + } + case PARTITION_STRATEGY_RANGE: { PartitionRangeBound *lower, @@ -898,6 +972,11 @@ get_qual_from_partbound(Relation rel, Relation parent, my_qual = get_qual_for_range(key, spec); break; + case PARTITION_STRATEGY_HASH: + Assert(spec->strategy == PARTITION_STRATEGY_HASH); + my_qual = get_qual_for_range(key, spec); + break; + default: elog(ERROR, "unexpected partition strategy: %d", (int) key->strategy); @@ -1039,6 +1118,7 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode, /* Root partitioned table has no parent, so NULL for parent */ parted_rel_parents = list_make1(NULL); APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents); + forboth(lc1, all_parts, lc2, all_parents) { Relation partrel = heap_open(lfirst_oid(lc1), lockmode); @@ -1909,7 +1989,7 @@ FormPartitionKeyDatum(PartitionDispatch pd, if (partexpr_item != NULL) elog(ERROR, "wrong number of partition key expressions"); } - + /* * get_partition_for_tuple * Finds a leaf partition for tuple contained in *slot @@ -1935,6 +2015,7 @@ get_partition_for_tuple(PartitionDispatch *pd, result; ExprContext *ecxt = GetPerTupleExprContext(estate); TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple; + int nparts; /* start with the root partitioned table */ parent = pd[0]; @@ -2016,6 +2097,12 @@ get_partition_for_tuple(PartitionDispatch *pd, cur_index = -1; break; + case PARTITION_STRATEGY_HASH: + Assert(partdesc->nparts > 0); + nparts = partdesc->nparts; + cur_index = DatumGetUInt32(OidFunctionCall1(lookup_type_cache(key->parttypid[0], TYPECACHE_HASH_PROC)->hash_proc, values[0])) % nparts; + + break; case PARTITION_STRATEGY_RANGE: /* @@ -2257,6 +2344,13 @@ partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, *(Datum *) probe)); break; + case PARTITION_STRATEGY_HASH: + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], + key->partcollation[0], + bound_datums[0], + *(Datum *) probe)); + break; + case PARTITION_STRATEGY_RANGE: { RangeDatumContent *content = boundinfo->content[offset]; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index bb00858..4e00a60 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -475,6 +475,11 @@ static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd); static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name); +static void ReInsertHashPartition(Relation rel, Oid parentId, Node *bound); + +static void InsertIntoTable(Oid relid, HeapTuple tuple, TupleDesc tupdesc); +static TupleTableSlot *ExecInsertPartition(TupleTableSlot *slot, TupleTableSlot *planSlot, + EState *estate, bool canSetTag); /* ---------------------------------------------------------------- * DefineRelation @@ -765,6 +770,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, /* Process and store partition bound, if any. */ if (stmt->partbound) { + PartitionBoundSpec *bound; ParseState *pstate; Oid parentId = linitial_oid(inheritOids); @@ -799,6 +805,9 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, /* Update the pg_class entry. */ StorePartitionBound(rel, parent, bound); + /*judge if create new hash partition, if so, re_insert data into all partitions*/ + ReInsertHashPartition(rel, parentId, (Node *)bound); + heap_close(parent, NoLock); /* @@ -872,6 +881,210 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, return address; } +/* + * ReInsertHashPartition + * when create a new hash partition, re_insert data into all partitions + */ +static void +ReInsertHashPartition(Relation rel, Oid parentId, Node *bound) +{ + Relation parent; + PartitionKey key; + PartitionDesc pdesc; + int nparts; + + parent = heap_open(parentId, ExclusiveLock); + key = RelationGetPartitionKey(parent); + pdesc = RelationGetPartitionDesc(parent); + + nparts = pdesc->nparts + 1; + + if (key->strategy == PARTITION_STRATEGY_HASH) + { + int i; + Oid *children = pdesc->oids; + + /*get data from every partition and re_insert*/ + for (i = 0; i < pdesc->nparts; i++) + { + Oid child = children[i]; + Relation hash_rel; + HeapScanDesc hash_scan; + HeapTuple hash_tup; + Snapshot snapshot; + Datum val; + bool isnull; + int index; + Oid insertRel; + + hash_rel = heap_open(child, ExclusiveLock); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + hash_scan = heap_beginscan(hash_rel, snapshot, 0, NULL); + + /*only partition table has tuples then re_insert*/ + while ((hash_tup = heap_getnext(hash_scan, ForwardScanDirection)) != NULL) + { + /*get the key's data of the tuple*/ + val = fastgetattr(hash_tup, key->partattrs[0], hash_rel->rd_att, &isnull); + + /*get the partition's index of the key*/ + index = DatumGetUInt32(OidFunctionCall1(lookup_type_cache(key->parttypid[0], TYPECACHE_HASH_PROC)->hash_proc, val)) % nparts; + + /*get partition's oid that match the key's data*/ + insertRel = HashSerialGetPartition(parentId, index); + + if (index == nparts - 1) + { + InsertIntoTable(RelationGetRelid(rel), hash_tup, parent->rd_att); + simple_heap_delete(hash_rel, &hash_tup->t_self); + } + else + { + InsertIntoTable(insertRel, hash_tup, parent->rd_att); + simple_heap_delete(hash_rel, &hash_tup->t_self); + } + } + + heap_endscan(hash_scan); + UnregisterSnapshot(snapshot); + heap_close(hash_rel, ExclusiveLock); + } + } + + heap_close(parent, ExclusiveLock); +} + +/* + * InsertIntoTable + * Find a table relation given it's name and insert the + * tuple on it (updating the indexes and calling the triggers) + */ +static void +InsertIntoTable(Oid relid, HeapTuple tuple, TupleDesc tupdesc) +{ + ResultRelInfo *resultRelInfo; + TupleTableSlot *slot; + EState *estate = CreateExecutorState(); + Relation relation; + + /* Lookup the relation */ + if (relid == InvalidOid) + elog(ERROR, "partition_insert_trigger: Invalid child table %s", get_rel_name(relid)); + relation = RelationIdGetRelation(relid); + if (relation == NULL) + elog(ERROR, "partition_insert_trigger: Failed to locate relation for child table %s", get_rel_name(relid)); + + /* + * We need a ResultRelInfo so we can use the regular executor's + * index-entry-making machinery. + */ + resultRelInfo = makeNode(ResultRelInfo); + resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ + resultRelInfo->ri_RelationDesc = relation; + resultRelInfo->ri_TrigDesc = CopyTriggerDesc(relation->trigdesc); + if (resultRelInfo->ri_TrigDesc) + { + resultRelInfo->ri_TrigFunctions = (FmgrInfo *) + palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo)); + resultRelInfo->ri_TrigWhenExprs = (List **) + palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(List *)); + } + resultRelInfo->ri_TrigInstrument = NULL; + ExecOpenIndices(resultRelInfo, false); + estate->es_result_relations = resultRelInfo; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = resultRelInfo; + /*added for GetModifiedColumns in ExecConstraints functions*/ + estate->es_range_table = list_make1(relation); + + /* Set up a tuple slot too */ + slot = ExecInitExtraTupleSlot(estate); + ExecSetSlotDescriptor(slot, tupdesc); + /* Triggers might need a slot as well */ + estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate); + + ExecStoreTuple(tuple, slot, InvalidBuffer, false); + ExecInsertPartition(slot, slot, estate, false); + + /* Free resources */ + ExecResetTupleTable(estate->es_tupleTable, false); + ExecCloseIndices(resultRelInfo); + FreeExecutorState(estate); + RelationClose(relation); +} + +/* + * ExecInsertPartition: + * For INSERT, we have to insert the tuple into the target relation + * and insert appropriate tuples into the index relations. + */ +static TupleTableSlot * +ExecInsertPartition(TupleTableSlot *slot, TupleTableSlot *planSlot, EState *estate, bool canSetTag) +{ + HeapTuple tuple; + ResultRelInfo *resultRelInfo; + Relation resultRelationDesc; + Oid newId; + List *recheckIndexes = NIL; + + /* + * get the heap tuple out of the tuple table slot, making sure we have a + * writable copy + */ + tuple = ExecMaterializeSlot(slot); + + /* get information on the (current) result relation*/ + resultRelInfo = estate->es_result_relation_info; + resultRelationDesc = resultRelInfo->ri_RelationDesc; + + if (resultRelationDesc->rd_rel->relhasoids) + HeapTupleSetOid(tuple, InvalidOid); + + /* BEFORE ROW INSERT Triggers */ + if (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_insert_before_row) + { + slot = ExecBRInsertTriggers(estate, resultRelInfo, slot); + if (slot == NULL) + return NULL; + /* trigger might have changed tuple */ + tuple = ExecMaterializeSlot(slot); + } + + /* INSTEAD OF ROW INSERT Triggers */ + if (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_insert_instead_row) + { + slot = ExecIRInsertTriggers(estate, resultRelInfo, slot); + if (slot == NULL) + return NULL; + /* trigger might have changed tuple */ + tuple = ExecMaterializeSlot(slot); + newId = InvalidOid; + } + else + { + /* Check the constraints of the tuple*/ + if (resultRelationDesc->rd_att->constr) + ExecConstraints(resultRelInfo, slot, estate); + newId = heap_insert(resultRelationDesc, tuple, + estate->es_output_cid, 0, NULL); + } + + if (canSetTag) + { + (estate->es_processed)++; + estate->es_lastoid = newId; + setLastTid(&(tuple->t_self)); + } + + /* AFTER ROW INSERT Triggers */ + ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes, NULL); + list_free(recheckIndexes); + + return NULL; +} + /* * Emit the right error or warning message for a "DROP" command issued on a * non-existent relation @@ -1166,6 +1379,15 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, LockRelationOid(state->heapOid, heap_lockmode); } + if (is_partition && relOid != oldRelOid) + { + if(is_hash_partition(relOid)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("hash partition \"%s\" can not be dropped", + get_rel_name(relOid)))); + } + /* * Similarly, if the relation is a partition, we must acquire lock on its * parent before locking the partition. That's because queries lock the @@ -1745,6 +1967,7 @@ MergeAttributes(List *schema, List *supers, char relpersistence, * exclusive lock on the parent because its partition descriptor will * be changed by addition of the new partition. */ + if (!is_partition) relation = heap_openrv(parent, ShareUpdateExclusiveLock); else @@ -13146,18 +13369,29 @@ transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy) ParseState *pstate; RangeTblEntry *rte; ListCell *l; - + newspec = makeNode(PartitionSpec); newspec->strategy = partspec->strategy; newspec->partParams = NIL; newspec->location = partspec->location; - + /* Parse partitioning strategy name */ if (pg_strcasecmp(partspec->strategy, "list") == 0) *strategy = PARTITION_STRATEGY_LIST; else if (pg_strcasecmp(partspec->strategy, "range") == 0) *strategy = PARTITION_STRATEGY_RANGE; + else if (pg_strcasecmp(partspec->strategy, "hash") == 0) + { + if (list_length(partspec->partParams) == 1) + { + *strategy = PARTITION_STRATEGY_HASH; + } + else + { + elog(ERROR, "The hash partition does not support multiple key values."); + } + } else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -13434,6 +13668,13 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) ObjectAddress address; const char *trigger_name; + /*hash partition do not support attach operation*/ + if (rel->rd_partkey) + if (rel->rd_partkey->strategy == 'h') + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash partition do not support attach operation"))); + attachRel = heap_openrv(cmd->name, AccessExclusiveLock); /* @@ -13771,6 +14012,13 @@ ATExecDetachPartition(Relation rel, RangeVar *name) new_repl[Natts_pg_class]; ObjectAddress address; + /*hash partition do not support detach operation*/ + if (rel->rd_partkey) + if (rel->rd_partkey->strategy == 'h') + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash partition do not support detach operation"))); + partRel = heap_openrv(name, AccessShareLock); /* All inheritance related checks are performed within the function */ diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 0f08283..efd66bc 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -3228,6 +3228,7 @@ ExecSetupPartitionTupleRouting(Relation rel, /* Get the tuple-routing information and lock partitions */ *pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, num_parted, &leaf_parts); + *num_partitions = list_length(leaf_parts); *partitions = (ResultRelInfo *) palloc(*num_partitions * sizeof(ResultRelInfo)); @@ -3317,6 +3318,7 @@ ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, result = get_partition_for_tuple(pd, slot, estate, &failed_at, &failed_slot); + if (result < 0) { Relation failed_rel; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 8d17425..271b0f6 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1919,6 +1919,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) &partition_tupconv_maps, &partition_tuple_slot, &num_parted, &num_partitions); + mtstate->mt_partition_dispatch_info = partition_dispatch_info; mtstate->mt_num_dispatch = num_parted; mtstate->mt_partitions = partitions; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 67ac814..b86bfe6 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -24,6 +24,7 @@ #include "miscadmin.h" #include "nodes/extensible.h" +#include "nodes/parsenodes.h" #include "nodes/plannodes.h" #include "nodes/relation.h" #include "utils/datum.h" @@ -4448,6 +4449,7 @@ _copyPartitionBoundSpec(const PartitionBoundSpec *from) COPY_NODE_FIELD(listdatums); COPY_NODE_FIELD(lowerdatums); COPY_NODE_FIELD(upperdatums); + COPY_SCALAR_FIELD(hashnumber); COPY_LOCATION_FIELD(location); return newnode; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index b0abe9e..73d09cc 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -3565,6 +3565,7 @@ _outPartitionBoundSpec(StringInfo str, const PartitionBoundSpec *node) WRITE_NODE_FIELD(listdatums); WRITE_NODE_FIELD(lowerdatums); WRITE_NODE_FIELD(upperdatums); + WRITE_INT_FIELD(hashnumber); WRITE_LOCATION_FIELD(location); } diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 1380703..74391a7 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2377,6 +2377,7 @@ _readPartitionBoundSpec(void) READ_NODE_FIELD(listdatums); READ_NODE_FIELD(lowerdatums); READ_NODE_FIELD(upperdatums); + READ_INT_FIELD(hashnumber); READ_LOCATION_FIELD(location); READ_DONE(); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index dc0b0b0..6333e43 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -28,7 +28,8 @@ #include "catalog/dependency.h" #include "catalog/heap.h" #include "catalog/partition.h" -#include "catalog/pg_am.h" +#include "catalog/pg_am.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_statistic_ext.h" #include "foreign/fdwapi.h" #include "miscadmin.h" @@ -48,7 +49,7 @@ #include "utils/syscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" - +#include "utils/typcache.h" /* GUC parameter */ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION; @@ -69,6 +70,10 @@ static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); static List *get_relation_statistics(RelOptInfo *rel, Relation relation); +static int get_hash_part_attrs(Oid parentid); +static char get_hash_part_strategy(Oid parentid); +static int get_hash_part_number(Oid parentid); + /* * get_relation_info - * Retrieves catalog information for a given relation. @@ -1441,9 +1446,376 @@ relation_excluded_by_constraints(PlannerInfo *root, if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo, false)) return true; + /* hash partition constraint exclusion. */ + if (NIL != root->append_rel_list) + { + Node *parent = NULL; + parent = (Node*)linitial(root->append_rel_list); + + if ((nodeTag(parent) == T_AppendRelInfo) + && get_hash_part_strategy(((AppendRelInfo*)parent)->parent_reloid) == PARTITION_STRATEGY_HASH + && (root->parse->jointree->quals != NULL)) + { + int cur_index = -1, in_index = -1, bool_index = -1; + Oid poid; + Relation relation; + PartitionKey key; + RelabelType *rt; + + poid = ((AppendRelInfo*)parent)->parent_reloid; + relation = RelationIdGetRelation(poid); + key = RelationGetPartitionKey(relation); + + heap_close(relation, NoLock); + + BoolExpr *boolexpr; + OpExpr *subopexpr; + Node *leftSubOp, *rightSubOp; + Node *constExprInSub, *varExprInSub; + Const *subc; + Var *subv; + int subpartattr; + int substrat; + TypeCacheEntry *subtce; + + OpExpr *predicateExpr; + Node *predicate; + List *list; + Node *leftPredicateOp, *rightPredicateOp; + Node *constExprInPredicate, *varExprInPredicate; + Const *c; + Var *v; + int partattr; + int strat; + TypeCacheEntry *tce; + + ScalarArrayOpExpr *arrayexpr; + Node *arraynode; + + predicate = rel->baserestrictinfo; + + list = (List *)predicate; + + if (list_length(list) != 1) + return false; + + predicate = linitial(list); + if (IsA(predicate, RestrictInfo)) + { + RestrictInfo *info = (RestrictInfo*)predicate; + predicate = (Node*)info->clause; + } + + switch (nodeTag(predicate)) + { + /* AND, OR, NOT expressions */ + case T_BoolExpr: + + boolexpr = (BoolExpr *) predicate; + bool subflag = true; + + foreach (lc, boolexpr->args) + { + Expr *arg = (Expr *) lfirst(lc); + OpExpr *oparg = (OpExpr*)lfirst(lc); + + leftSubOp = get_leftop((Expr*)arg); + rightSubOp = get_rightop((Expr*)arg); + int *subidx; + + /* check if one operand is a constant */ + if (IsA(rightSubOp, Const)) + { + varExprInSub = leftSubOp; + constExprInSub = rightSubOp; + } + else if (IsA(leftSubOp, Const)) + { + constExprInSub = leftSubOp; + varExprInSub = rightSubOp; + } + else + { + return false; + } + + subtce = lookup_type_cache(key->parttypid[0], TYPECACHE_BTREE_OPFAMILY); + substrat = get_op_opfamily_strategy(oparg->opno, subtce->btree_opf); + + subpartattr = get_hash_part_attrs(poid); + subv = (Var*)varExprInSub; + + switch (boolexpr->boolop) + { + case OR_EXPR: + if (subv->varattno == subpartattr && substrat == BTEqualStrategyNumber) + { + subc = (Const*)constExprInSub; + bool_index = DatumGetUInt32(OidFunctionCall1( + lookup_type_cache(key->parttypid[0], TYPECACHE_HASH_PROC)->hash_proc, + subc->constvalue)) + % list_length(root->append_rel_list); + + subidx = &bool_index; + if (subidx != NULL) + { + if (get_hash_part_number(rte->relid) == *subidx) + { + subflag = false; + } + + } + } + else + { + /* constraint exclusion in hash partition could not support <, > etc. */ + return false; + } + + break; + + case AND_EXPR: + + break; + + default: + + break; + } + } + + if (subflag) + return true; + else + return false; + + break; + + /* =, !=, <, > etc. */ + case T_OpExpr: + predicateExpr = (OpExpr *)predicate; + leftPredicateOp = get_leftop((Expr*)predicate); + rightPredicateOp = get_rightop((Expr*)predicate); + + /* check if one operand is a constant */ + if (IsA(rightPredicateOp, Const)) + { + varExprInPredicate = leftPredicateOp; + constExprInPredicate = rightPredicateOp; + } + else if (IsA(leftPredicateOp, Const)) + { + constExprInPredicate = leftPredicateOp; + varExprInPredicate = rightPredicateOp; + } + else + { + return false; + } + + tce = lookup_type_cache(key->parttypid[0], TYPECACHE_BTREE_OPFAMILY); + strat = get_op_opfamily_strategy(predicateExpr->opno, tce->btree_opf); + + partattr = get_hash_part_attrs(poid); + v = (Var*)varExprInPredicate; + + /* If strat is "=", select one partiton */ + if (v->varattno == partattr && strat == BTEqualStrategyNumber) + { + c = (Const*)constExprInPredicate; + cur_index = DatumGetUInt32(OidFunctionCall1( + lookup_type_cache(key->parttypid[0], TYPECACHE_HASH_PROC)->hash_proc, + c->constvalue)) + % list_length(root->append_rel_list); + } + else + { + /* constraint exclusion in hash partition could not support <, > etc. */ + return false; + } + + if (get_hash_part_number(rte->relid) != cur_index && cur_index != -1) + { + return true; + } + + break; + + /* IN expression */ + case T_ScalarArrayOpExpr: + arrayexpr = (ScalarArrayOpExpr *)predicate; + arraynode = (Node *) lsecond(arrayexpr->args); + bool flag = true; + + if (arraynode && IsA(arraynode, Const) && + !((Const *) arraynode)->constisnull) + { + ArrayType *arrayval; + int16 elemlen; + bool elembyval; + char elemalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + int strategy = BTEqualStrategyNumber; + int i; + int *idx; + + + /* Extract values from array */ + arrayval = DatumGetArrayTypeP(((Const *) arraynode)->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elemlen, &elembyval, &elemalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elemlen, elembyval, elemalign, + &elem_values, &elem_nulls, &num_elems); + + /* Construct OIDs list */ + for (i = 0; i < num_elems; i++) + { + if (!elem_nulls[i]) + { + /* Invoke base hash function for value type */ + in_index = DatumGetUInt32(OidFunctionCall1( + lookup_type_cache(key->parttypid[0], TYPECACHE_HASH_PROC)->hash_proc, + elem_values[i])) + % list_length(root->append_rel_list); + idx = &in_index; + + if (idx != NULL) + { + if (get_hash_part_number(rte->relid) == *idx) + { + flag = false; + } + + } + } + } + + /* Free resources */ + pfree(elem_values); + pfree(elem_nulls); + } + + if (flag) + return true; + else + return false; + + break; + + default: + elog(ERROR, "Unknown clause type."); + } + + } + + return false; + } + return false; } +/* + * get_hash_part_attrs + * + */ +int +get_hash_part_attrs(Oid parentid) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + int result; + + tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(parentid)); + + form = (Form_pg_partitioned_table)GETSTRUCT(tuple); + result = form->partattrs.values[0]; + + ReleaseSysCache(tuple); + + return result; +} + +/* + * get_hash_part_number + * + */ +int +get_hash_part_number(Oid parentid) +{ + HeapTuple tuple; + int result; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(parentid)); + + Datum boundDatum; + bool isnull; + PartitionBoundSpec *bound; + + boundDatum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + + ReleaseSysCache(tuple); + + if (boundDatum) + { + bound = (PartitionBoundSpec *)(stringToNode(TextDatumGetCString(boundDatum))); + return bound->hashnumber; + } + + return result; +} + +/* + * get_hash_part_strategy + * + */ +char +get_hash_part_strategy(Oid parentid) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + char result; + + tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(parentid)); + if (!HeapTupleIsValid(tuple)) + { + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(parentid)); + + if (!HeapTupleIsValid(tuple)) + { + return '0'; + } + else + { + Datum boundDatum; + bool isnull; + PartitionBoundSpec *bound; + + boundDatum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + + ReleaseSysCache(tuple); + + if (boundDatum) + { + bound = (PartitionBoundSpec *)(stringToNode(TextDatumGetCString(boundDatum))); + return bound->strategy; + } + return '0'; + } + } + form = (Form_pg_partitioned_table)GETSTRUCT(tuple); + result = form->partstrat; + ReleaseSysCache(tuple); + + return result; +} /* * build_physical_tlist diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0f3998f..1575cbe 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -2675,6 +2675,17 @@ ForValues: $$ = n; } + /* a HASH partition */ + | /*EMPTY*/ + { + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + + n->strategy = PARTITION_STRATEGY_HASH; + //n->hashnumber = 1; + //n->location = @3; + + $$ = n; + } ; partbound_datum: diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index ee5f3a3..1742fb3 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -33,6 +33,7 @@ #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/namespace.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" @@ -3299,6 +3300,8 @@ transformPartitionBound(ParseState *pstate, Relation parent, char strategy = get_partition_strategy(key); int partnatts = get_partition_natts(key); List *partexprs = get_partition_exprs(key); + PartitionDesc pdesc; + pdesc = RelationGetPartitionDesc(parent); /* Avoid scribbling on input */ result_spec = copyObject(spec); @@ -3359,6 +3362,28 @@ transformPartitionBound(ParseState *pstate, Relation parent, value); } } + else if (strategy == PARTITION_STRATEGY_HASH) + { + char *colname; + + if (spec->strategy != PARTITION_STRATEGY_HASH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("invalid bound specification for a hash partition"), + parser_errposition(pstate, exprLocation((Node *) spec)))); + + /* Get the only column's name in case we need to output an error */ + if (key->partattrs[0] != 0) + colname = get_relid_attribute_name(RelationGetRelid(parent), + key->partattrs[0]); + else + colname = deparse_expression((Node *) linitial(partexprs), + deparse_context_for(RelationGetRelationName(parent), + RelationGetRelid(parent)), + false, false); + + result_spec->hashnumber = pdesc->nparts; + } else if (strategy == PARTITION_STRATEGY_RANGE) { ListCell *cell1, diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 18d9e27..19d216d 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1660,6 +1660,10 @@ pg_get_partkeydef_worker(Oid relid, int prettyFlags, if (!attrsOnly) appendStringInfo(&buf, "RANGE"); break; + case PARTITION_STRATEGY_HASH: + if (!attrsOnly) + appendStringInfo(&buf, "HASH"); + break; default: elog(ERROR, "unexpected partition strategy: %d", (int) form->partstrat); @@ -8666,6 +8670,13 @@ get_rule_expr(Node *node, deparse_context *context, appendStringInfoString(buf, ")"); break; + case PARTITION_STRATEGY_HASH: + Assert(spec->hashnumber != -1); + + appendStringInfoString(buf, "SERIAL NUMBER"); + appendStringInfo(buf, " %d", spec->hashnumber); + break; + case PARTITION_STRATEGY_RANGE: Assert(spec->lowerdatums != NIL && spec->upperdatums != NIL && diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 502d5eb..4d71c21 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -15340,12 +15340,33 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) appendPQExpBufferStr(q, " (\n)"); } - if (tbinfo->ispartition && !dopt->binary_upgrade) + if(tbinfo->ispartition && !dopt->binary_upgrade) { - appendPQExpBufferStr(q, "\n"); - appendPQExpBufferStr(q, tbinfo->partbound); + PGresult *res; + PQExpBuffer query = createPQExpBuffer(); + char *strategy; + char *s = "h"; + + appendPQExpBuffer(query, " select partstrat" + " from pg_partitioned_table p" + " join pg_inherits i on i.inhparent = p.partrelid" + " where i.inhrelid = %d", tbinfo->dobj.catId.oid); + + res = ExecuteSqlQueryForSingleRow(fout, query->data); + strategy = pg_strdup(PQgetvalue(res, 0, 0)); + + if(!(strcmp(strategy, s) == 0)) + { + appendPQExpBufferStr(q, "\n"); + appendPQExpBufferStr(q, tbinfo->partbound); + } + + PQclear(res); + destroyPQExpBuffer(query); } + + /* Emit the INHERITS clause, except if this is a partition. */ if (numParents > 0 && !tbinfo->ispartition && diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index f10879a..ecdd757 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -70,6 +70,10 @@ typedef struct PartitionDispatchData typedef struct PartitionDispatchData *PartitionDispatch; +extern bool is_hash_partitioned(Oid relid); +extern bool is_hash_partition(Oid relid); +extern Oid HashSerialGetPartition(Oid parentOid, int index); + extern void RelationBuildPartitionDesc(Relation relation); extern bool partition_bounds_equal(PartitionKey key, PartitionBoundInfo p1, PartitionBoundInfo p2); diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 1d96169..73b8281 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -785,6 +785,7 @@ typedef struct PartitionSpec /* Internal codes for partitioning strategies */ #define PARTITION_STRATEGY_LIST 'l' #define PARTITION_STRATEGY_RANGE 'r' +#define PARTITION_STRATEGY_HASH 'h' /* * PartitionBoundSpec - a partition bound specification @@ -805,6 +806,9 @@ typedef struct PartitionBoundSpec List *lowerdatums; /* List of PartitionRangeDatums */ List *upperdatums; /* List of PartitionRangeDatums */ + /* Partitioning info for HASH strategy: */ + int hashnumber; /* serial number */ + int location; /* token location, or -1 if unknown */ } PartitionBoundSpec; diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out index b6f794e..e434ae2 100644 --- a/src/test/regress/expected/create_table.out +++ b/src/test/regress/expected/create_table.out @@ -340,11 +340,6 @@ CREATE TABLE partitioned ( ) PARTITION BY RANGE (const_func()); ERROR: cannot use constant expression as partition key DROP FUNCTION const_func(); --- only accept "list" and "range" as partitioning strategy -CREATE TABLE partitioned ( - a int -) PARTITION BY HASH (a); -ERROR: unrecognized partitioning strategy "hash" -- specified column must be present in the table CREATE TABLE partitioned ( a int diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql index cb7aa5b..95b0e23 100644 --- a/src/test/regress/sql/create_table.sql +++ b/src/test/regress/sql/create_table.sql @@ -350,11 +350,6 @@ CREATE TABLE partitioned ( ) PARTITION BY RANGE (const_func()); DROP FUNCTION const_func(); --- only accept "list" and "range" as partitioning strategy -CREATE TABLE partitioned ( - a int -) PARTITION BY HASH (a); - -- specified column must be present in the table CREATE TABLE partitioned ( a int