Makefile | 2 +- src/cuda_control.c | 2 +- src/datastore.c | 2 +- src/gpuscan.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++------ src/main.c | 10 ++-- 5 files changed, 163 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 90dc685..a41322d 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ PG_VERSION_NUM=$(shell $(PG_CONFIG) --version | awk '{print $$NF}' \ # Source file of CPU portion STROM_OBJS = main.o codegen.o datastore.o aggfuncs.o \ cuda_control.o cuda_program.o cuda_mmgr.o \ - gpuscan.o gpujoin.o gpupreagg.o gpusort.o + gpuscan.o #gpujoin.o gpupreagg.o gpusort.o # Source file of GPU portion CUDA_OBJS = cuda_common.o \ diff --git a/src/cuda_control.c b/src/cuda_control.c index 6b5f74c..a012a89 100644 --- a/src/cuda_control.c +++ b/src/cuda_control.c @@ -1599,7 +1599,7 @@ pgstrom_fetch_gputask(GpuTaskState *gts) { gts->scan_done = true; elog(DEBUG1, "scan done (%s)", - gts->css.methods->CustomName); + gts->css.methods->xnode.extnodename); break; } dlist_push_tail(>s->pending_tasks, >ask->chain); diff --git a/src/datastore.c b/src/datastore.c index 84147cf..d22d846 100644 --- a/src/datastore.c +++ b/src/datastore.c @@ -286,7 +286,7 @@ pgstrom_get_bulkload_density(Plan *child_plan) * bulk-output. So, we need to walk down if child node has bulk- * input. */ - while (pgstrom_plan_is_gpujoin_bulkinput(child_plan)) + while (false) //pgstrom_plan_is_gpujoin_bulkinput(child_plan)) { Plan *curr_plan = child_plan; diff --git a/src/gpuscan.c b/src/gpuscan.c index 0ad6315..318b234 100644 --- a/src/gpuscan.c +++ b/src/gpuscan.c @@ -19,6 +19,7 @@ #include "access/xact.h" #include "catalog/pg_namespace.h" #include "miscadmin.h" +#include "nodes/readfuncs.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -99,6 +100,15 @@ deform_gpuscan_info(Plan *plan) return result; } +typedef struct { + CustomScan cscan; + const char *kern_source; + int32 extra_flags; + List *used_params; + List *used_vars; + List *dev_quals; +} GpuScan; + typedef struct { GpuTask task; @@ -502,7 +512,8 @@ create_gpuscan_plan(PlannerInfo *root, List *clauses, List *custom_children) { - CustomScan *cscan; +// CustomScan *cscan; + GpuScan *gscan; GpuScanInfo gs_info; List *host_quals = NIL; List *dev_quals = NIL; @@ -515,6 +526,8 @@ create_gpuscan_plan(PlannerInfo *root, Assert(rel->rtekind == RTE_RELATION); Assert(custom_children == NIL); + elog(INFO, "best_path => %s", nodeToString(best_path)); + /* * Distribution of clauses into device executable and others. * @@ -543,23 +556,28 @@ create_gpuscan_plan(PlannerInfo *root, /* * Construction of GpuScanPlan node; on top of CustomPlan node */ - cscan = makeNode(CustomScan); - cscan->scan.plan.targetlist = tlist; - cscan->scan.plan.qual = host_quals; - cscan->scan.plan.lefttree = NULL; - cscan->scan.plan.righttree = NULL; - cscan->scan.scanrelid = rel->relid; + gscan = (GpuScan *) newNode(sizeof(GpuScan), T_CustomScan); + gscan->cscan.scan.plan.targetlist = tlist; + gscan->cscan.scan.plan.qual = host_quals; + gscan->cscan.scan.plan.lefttree = NULL; + gscan->cscan.scan.plan.righttree = NULL; + gscan->cscan.scan.scanrelid = rel->relid; gs_info.kern_source = kern_source; gs_info.extra_flags = context.extra_flags | DEVKERNEL_NEEDS_GPUSCAN; gs_info.used_params = context.used_params; gs_info.used_vars = context.used_vars; gs_info.dev_quals = dev_quals; - form_gpuscan_info(cscan, &gs_info); - cscan->flags = best_path->flags; - cscan->methods = &gpuscan_plan_methods; - - return &cscan->scan.plan; + form_gpuscan_info(&gscan->cscan, &gs_info); + gscan->cscan.flags = best_path->flags; + gscan->cscan.methods = &gpuscan_plan_methods; + gscan->kern_source = gs_info.kern_source; + gscan->extra_flags = gs_info.extra_flags; + gscan->used_params = gs_info.used_params; + gscan->used_vars = gs_info.used_vars; + gscan->dev_quals = gs_info.dev_quals; + + return &gscan->cscan.scan.plan; } /* @@ -616,6 +634,100 @@ pgstrom_gpuscan_setup_bulkslot(PlanState *outer_planstate, *p_bulk_slot = css->ss.ss_ScanTupleSlot; } +static void +gpuscan_node_copy(Node *_newnode, const Node *_oldnode) +{ + GpuScan *newnode = (GpuScan *) _newnode; + const GpuScan *oldnode = (const GpuScan *) _oldnode; + + newnode->kern_source = (oldnode->kern_source != NULL + ? pstrdup(oldnode->kern_source) + : NULL); + newnode->extra_flags = oldnode->extra_flags; + newnode->used_params = copyObject(oldnode->used_params); + newnode->used_vars = copyObject(oldnode->used_vars); + newnode->dev_quals = copyObject(oldnode->dev_quals); +} + +static void +_outToken(StringInfo str, const char *s) +{ + if (s == NULL || *s == '\0') + { + appendStringInfoString(str, "<>"); + return; + } + + /* + * Look for characters or patterns that are treated specially by read.c + * (either in pg_strtok() or in nodeRead()), and therefore need a + * protective backslash. + */ + /* These characters only need to be quoted at the start of the string */ + if (*s == '<' || + *s == '\"' || + isdigit((unsigned char) *s) || + ((*s == '+' || *s == '-') && + (isdigit((unsigned char) s[1]) || s[1] == '.'))) + appendStringInfoChar(str, '\\'); + while (*s) + { + /* These chars must be backslashed anywhere in the string */ + if (*s == ' ' || *s == '\n' || *s == '\t' || + *s == '(' || *s == ')' || *s == '{' || *s == '}' || + *s == '\\') + appendStringInfoChar(str, '\\'); + appendStringInfoChar(str, *s++); + } +} + +static void +gpuscan_node_out(StringInfo str, const Node *node) +{ + const GpuScan *gscan = (const GpuScan *) node; + + appendStringInfo(str, " :kern_source "); + _outToken(str, gscan->kern_source); + + appendStringInfo(str, " :extra_flags %u", gscan->extra_flags); + + appendStringInfo(str, " :used_params %s", + nodeToString(gscan->used_params)); + appendStringInfo(str, " :used_vars %s", + nodeToString(gscan->used_vars)); + appendStringInfo(str, " :dev_quals %s", + nodeToString(gscan->dev_quals)); +} + +static void +gpuscan_node_read(Node *node) +{ + GpuScan *gscan = (GpuScan *) node; + char *token; + int length; + + /* :kern_source */ + token = pg_strtok(&length); + token = pg_strtok(&length); + gscan->kern_source = (length == 0 ? NULL : debackslash(token, length)); + /* :extra_flags */ + token = pg_strtok(&length); + token = pg_strtok(&length); + gscan->extra_flags = (unsigned int ) strtoul(token, NULL, 10); + /* :used_params */ + token = pg_strtok(&length); + token = pg_strtok(&length); + gscan->used_params = nodeRead(token, length); + /* :used_vars */ + token = pg_strtok(&length); + token = pg_strtok(&length); + gscan->used_vars = nodeRead(token, length); + /* :dev_quals */ + token = pg_strtok(&length); + token = pg_strtok(&length); + gscan->dev_quals = nodeRead(token, length); +} + /* * gpuscan_create_scan_state * @@ -646,6 +758,15 @@ gpuscan_begin(CustomScanState *node, EState *estate, int eflags) GpuContext *gcontext = NULL; GpuScanState *gss = (GpuScanState *) node; GpuScanInfo *gs_info = deform_gpuscan_info(node->ss.ps.plan); + char *test_str; + Node *test_node; + + /* test for serialization/deserialization */ + elog(INFO, "test-1: %s", nodeToString(node->ss.ps.plan)); + test_str = nodeToString(copyObject(node->ss.ps.plan)); + elog(INFO, "test-2: %s", test_str); + test_node = stringToNode(test_str); + elog(INFO, "test-3: %s", nodeToString(test_node)); /* gpuscan should not have inner/outer plan right now */ Assert(outerPlan(node) == NULL); @@ -962,35 +1083,52 @@ pgstrom_init_gpuscan(void) /* setup path methods */ memset(&gpuscan_path_methods, 0, sizeof(gpuscan_path_methods)); - gpuscan_path_methods.CustomName = "GpuScan"; + gpuscan_path_methods.xnode.extnodename = "GpuScanPath"; + gpuscan_path_methods.xnode.node_size = sizeof(GpuScanPath); gpuscan_path_methods.PlanCustomPath = create_gpuscan_plan; + RegisterExtensibleNodeMethods(&gpuscan_path_methods.xnode); /* setup plan methods */ memset(&gpuscan_plan_methods, 0, sizeof(gpuscan_plan_methods)); - gpuscan_plan_methods.CustomName = "GpuScan"; + gpuscan_plan_methods.xnode.extnodename = "GpuScan"; + gpuscan_plan_methods.xnode.node_size = sizeof(GpuScan); + gpuscan_plan_methods.xnode.nodeCopy = gpuscan_node_copy; + gpuscan_plan_methods.xnode.nodeOut = gpuscan_node_out; + gpuscan_plan_methods.xnode.nodeRead = gpuscan_node_read; gpuscan_plan_methods.CreateCustomScanState = gpuscan_create_scan_state; + RegisterExtensibleNodeMethods(&gpuscan_plan_methods.xnode); memset(&bulkscan_plan_methods, 0, sizeof(bulkscan_plan_methods)); - bulkscan_plan_methods.CustomName = "BulkScan"; + bulkscan_plan_methods.xnode.extnodename = "BulkScan"; + bulkscan_plan_methods.xnode.node_size = sizeof(GpuScan); + bulkscan_plan_methods.xnode.nodeCopy = gpuscan_node_copy; + bulkscan_plan_methods.xnode.nodeOut = gpuscan_node_out; + bulkscan_plan_methods.xnode.nodeRead = gpuscan_node_read; bulkscan_plan_methods.CreateCustomScanState = gpuscan_create_scan_state; + RegisterExtensibleNodeMethods(&bulkscan_plan_methods.xnode); /* setup exec methods */ memset(&gpuscan_exec_methods, 0, sizeof(gpuscan_exec_methods)); - gpuscan_exec_methods.c.CustomName = "GpuScan"; + gpuscan_exec_methods.c.xnode.extnodename = "GpuScanState"; + gpuscan_exec_methods.c.xnode.node_size = sizeof(GpuScanState); gpuscan_exec_methods.c.BeginCustomScan = gpuscan_begin; gpuscan_exec_methods.c.ExecCustomScan = gpuscan_exec; gpuscan_exec_methods.c.EndCustomScan = gpuscan_end; gpuscan_exec_methods.c.ReScanCustomScan = gpuscan_rescan; gpuscan_exec_methods.c.ExplainCustomScan = gpuscan_explain; gpuscan_exec_methods.ExecCustomBulk = gpuscan_exec_bulk; + RegisterExtensibleNodeMethods(&gpuscan_exec_methods.c.xnode); - bulkscan_exec_methods.c.CustomName = "BulkScan"; + memset(&bulkscan_exec_methods, 0, sizeof(bulkscan_exec_methods)); + bulkscan_exec_methods.c.xnode.extnodename = "BulkScanState"; + bulkscan_exec_methods.c.xnode.node_size = sizeof(GpuScanState); bulkscan_exec_methods.c.BeginCustomScan = gpuscan_begin; bulkscan_exec_methods.c.ExecCustomScan = gpuscan_exec; bulkscan_exec_methods.c.EndCustomScan = gpuscan_end; bulkscan_exec_methods.c.ReScanCustomScan = gpuscan_rescan; bulkscan_exec_methods.c.ExplainCustomScan = gpuscan_explain; bulkscan_exec_methods.ExecCustomBulk = gpuscan_exec_bulk; + RegisterExtensibleNodeMethods(&bulkscan_exec_methods.c.xnode); /* hook registration */ set_rel_pathlist_next = set_rel_pathlist_hook; diff --git a/src/main.c b/src/main.c index e0c2c96..7e2efe9 100644 --- a/src/main.c +++ b/src/main.c @@ -208,7 +208,7 @@ pgstrom_recursive_grafter(PlannedStmt *pstmt, Plan *parent, Plan **p_curr_plan) * Try to inject GpuPreAgg plan if cost of the aggregate plan * is enough expensive to justify preprocess by GPU. */ - pgstrom_try_insert_gpupreagg(pstmt, (Agg *) plan); + //pgstrom_try_insert_gpupreagg(pstmt, (Agg *) plan); break; case T_SubqueryScan: @@ -311,7 +311,7 @@ pgstrom_recursive_grafter(PlannedStmt *pstmt, Plan *parent, Plan **p_curr_plan) * Try to replace Sort node by GpuSort node if cost of * the alternative plan is enough reasonable to replace. */ - pgstrom_try_insert_gpusort(pstmt, p_curr_plan); + //pgstrom_try_insert_gpusort(pstmt, p_curr_plan); break; default: @@ -386,9 +386,9 @@ _PG_init(void) /* registration of custom-scan providers */ pgstrom_init_gpuscan(); - pgstrom_init_gpujoin(); - pgstrom_init_gpupreagg(); - pgstrom_init_gpusort(); +// pgstrom_init_gpujoin(); +// pgstrom_init_gpupreagg(); +// pgstrom_init_gpusort(); /* miscellaneous initializations */ pgstrom_init_misc_guc();