pl/python long-lived allocations in datum->dict transformation
Consider this:
create table arrays as select array[random(), random(), random(),
random(), random(), random()] as a from generate_series(1, 1000000);
create or replace function plpython_outputfunc() returns void as $$
c = plpy.cursor('select a from arrays')
for row in c:
pass
$$ language plpythonu;
When running the function, every datum will get transformed into a
Python dict, which includes calling the type's output function,
resulting in a memory allocation. The memory is allocated in the SPI
context, so it accumulates until the function is finished.
This is annoying for functions that plough through large tables, doing
some calculation. Attached is a patch that does the conversion of
PostgreSQL Datums into Python dict objects in a scratch memory context
that gets reset every time.
Cheers,
Jan
Attachments:
plpython-tuple-to-dict-leak.patchtext/x-diff; name=plpython-tuple-to-dict-leak.patchDownload
diff --git a/src/pl/plpython/plpy_main.c b/src/pl/plpython/plpy_main.c
index ae9d87e..79d7784 100644
*** a/src/pl/plpython/plpy_main.c
--- b/src/pl/plpython/plpy_main.c
***************
*** 12,17 ****
--- 12,18 ----
#include "executor/spi.h"
#include "miscadmin.h"
#include "utils/guc.h"
+ #include "utils/memutils.h"
#include "utils/syscache.h"
#include "plpython.h"
*************** plpython_inline_handler(PG_FUNCTION_ARGS
*** 268,273 ****
--- 269,279 ----
MemSet(&proc, 0, sizeof(PLyProcedure));
proc.pyname = PLy_strdup("__plpython_inline_block");
+ proc.tmp_ctx = AllocSetContextCreate(TopMemoryContext,
+ "PL/Python temporary ctx",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
proc.result.out.d.typoid = VOIDOID;
PG_TRY();
diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c
index 229966a..f539cec 100644
*** a/src/pl/plpython/plpy_procedure.c
--- b/src/pl/plpython/plpy_procedure.c
***************
*** 12,17 ****
--- 12,18 ----
#include "catalog/pg_type.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
+ #include "utils/memutils.h"
#include "utils/syscache.h"
#include "plpython.h"
*************** PLy_procedure_create(HeapTuple procTup,
*** 169,174 ****
--- 170,180 ----
proc->setof = NULL;
proc->src = NULL;
proc->argnames = NULL;
+ proc->tmp_ctx = AllocSetContextCreate(TopMemoryContext,
+ "PL/Python temporary ctx",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
PG_TRY();
{
*************** PLy_procedure_delete(PLyProcedure *proc)
*** 411,416 ****
--- 417,424 ----
PLy_free(proc->src);
if (proc->argnames)
PLy_free(proc->argnames);
+ if (proc->tmp_ctx)
+ MemoryContextDelete(proc->tmp_ctx);
}
/*
diff --git a/src/pl/plpython/plpy_procedure.h b/src/pl/plpython/plpy_procedure.h
index e986c7e..5ee73fa 100644
*** a/src/pl/plpython/plpy_procedure.h
--- b/src/pl/plpython/plpy_procedure.h
*************** typedef struct PLyProcedure
*** 30,35 ****
--- 30,36 ----
PyObject *code; /* compiled procedure code */
PyObject *statics; /* data saved across calls, local scope */
PyObject *globals; /* data saved across calls, global scope */
+ MemoryContext tmp_ctx;
} PLyProcedure;
/* the procedure cache entry */
diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c
index d5cac9f..6f0eb46 100644
*** a/src/pl/plpython/plpy_typeio.c
--- b/src/pl/plpython/plpy_typeio.c
***************
*** 23,28 ****
--- 23,29 ----
#include "plpy_typeio.h"
#include "plpy_elog.h"
+ #include "plpy_procedure.h"
/* I/O function caching */
*************** PLy_output_record_funcs(PLyTypeInfo *arg
*** 258,268 ****
Assert(arg->is_rowtype == 1);
}
PyObject *
PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc)
{
! PyObject *volatile dict;
! int i;
if (info->is_rowtype != 1)
elog(ERROR, "PLyTypeInfo structure describes a datum");
--- 259,274 ----
Assert(arg->is_rowtype == 1);
}
+ /*
+ * Transform a tuple into a Python dict object. The transformation can result
+ * in memory allocation, so it's done in a scratch memory context.
+ */
PyObject *
PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc)
{
! PyObject *volatile dict;
! MemoryContext oldcontext;
! int i;
if (info->is_rowtype != 1)
elog(ERROR, "PLyTypeInfo structure describes a datum");
*************** PLyDict_FromTuple(PLyTypeInfo *info, Hea
*** 271,276 ****
--- 277,284 ----
if (dict == NULL)
PLy_elog(ERROR, "could not create new dictionary");
+ oldcontext = MemoryContextSwitchTo(PLy_curr_procedure->tmp_ctx);
+
PG_TRY();
{
for (i = 0; i < info->in.r.natts; i++)
*************** PLyDict_FromTuple(PLyTypeInfo *info, Hea
*** 298,308 ****
--- 306,322 ----
}
PG_CATCH();
{
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextReset(PLy_curr_procedure->tmp_ctx);
+
Py_DECREF(dict);
PG_RE_THROW();
}
PG_END_TRY();
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextReset(PLy_curr_procedure->tmp_ctx);
+
return dict;
}
=?UTF-8?B?SmFuIFVyYmHFhHNraQ==?= <wulczer@wulczer.org> writes:
This is annoying for functions that plough through large tables, doing
some calculation. Attached is a patch that does the conversion of
PostgreSQL Datums into Python dict objects in a scratch memory context
that gets reset every time.
As best I can tell, this patch proposes creating a new, separate context
(chewing up 8KB+) for every plpython procedure that's ever used in a
given session. This cure could easily be worse than the disease as far
as total space consumption is concerned. What's more, it's unclear that
it won't malfunction altogether if the function is used recursively
(ie, what if PLyDict_FromTuple ends up calling the same function again?)
Can't you fix it so that the temp context is associated with a
particular function execution, rather than being "statically" allocated
per-function?
regards, tom lane
On 12/02/12 00:48, Tom Lane wrote:
=?UTF-8?B?SmFuIFVyYmHFhHNraQ==?= <wulczer@wulczer.org> writes:
This is annoying for functions that plough through large tables, doing
some calculation. Attached is a patch that does the conversion of
PostgreSQL Datums into Python dict objects in a scratch memory context
that gets reset every time.As best I can tell, this patch proposes creating a new, separate context
(chewing up 8KB+) for every plpython procedure that's ever used in a
given session. This cure could easily be worse than the disease as far
Yeah, that's not ideal.
What's more, it's unclear that
it won't malfunction altogether if the function is used recursively
(ie, what if PLyDict_FromTuple ends up calling the same function again?)
I was a bit worried about that, but the only place where
PLyDict_FromTuple calls into some other code is when it calls the type's
specific I/O function, which AFAICT can't call back into user code
(except for user-defined C I/O routines). It's not very comfortable, but
I think PLyDict_FromTuple can be allowed to be non-reentrant.
Can't you fix it so that the temp context is associated with a
particular function execution, rather than being "statically" allocated
per-function?
That would be cool, but I failed to easily get a handle on something
that's like the execution context of a PL/Python function... Actually,
if we assume that PLyDict_FromTuple (which is quite a low-level thing)
never calls PL/Python UDFs we could keep a single memory context in
top-level PL/Python memory and pay the overhead once in a session, not
once per function.
OTOH if we want to make it reentrant, some more tinkering would be in order.
Cheers,
Jan
=?UTF-8?B?SmFuIFVyYmHFhHNraQ==?= <wulczer@wulczer.org> writes:
On 12/02/12 00:48, Tom Lane wrote:
What's more, it's unclear that
it won't malfunction altogether if the function is used recursively
(ie, what if PLyDict_FromTuple ends up calling the same function again?)
I was a bit worried about that, but the only place where
PLyDict_FromTuple calls into some other code is when it calls the type's
specific I/O function, which AFAICT can't call back into user code
(except for user-defined C I/O routines). It's not very comfortable, but
I think PLyDict_FromTuple can be allowed to be non-reentrant.
I think that's pretty short-sighted. Even if it's safe today (which
I am not 100% convinced of), there are plenty of foreseeable reasons
why it might^Wwill break in the future.
* There is no reason to think that datatype I/O functions will never
be written in anything but C. People have asked repeatedly for the
ability to write them in higher-level languages. I doubt that would
ever be possible in plpgsql, but with languages that can do
bit-twiddling like plpython or plperl, it seems possible.
* A datatype I/O function, even if written in C, could call user-written
code. See domain_in for example, which can invoke arbitrary processing
via domain constraint checking. If you were proposing to patch
PLyObject_ToTuple rather than the other direction, this patch would be
breakable today. Admittedly the breakage would require some rather
contrived coding ("your domain's constraint check function does
*what*?"), but it would still be a security bug.
* Once we have the ability to associate a temp memory context with
plpython, there will be a temptation to use it for other purposes
besides this one, and it will not be long before such a purpose does
open a recursion risk, even if there's none there today. (Speaking of
which, it sure looks to me like PLyObject_ToDatum, PLyObject_ToTuple,
etc leak memory like there's no tomorrow.)
OTOH if we want to make it reentrant, some more tinkering would be in order.
I think that's in order.
regards, tom lane
On 14/02/12 01:35, Tom Lane wrote:
=?UTF-8?B?SmFuIFVyYmHFhHNraQ==?= <wulczer@wulczer.org> writes:
It's not very comfortable, but
I think PLyDict_FromTuple can be allowed to be non-reentrant.I think that's pretty short-sighted. Even if it's safe today (which
I am not 100% convinced of), there are plenty of foreseeable reasons
why it might^Wwill break in the future.OTOH if we want to make it reentrant, some more tinkering would be in order.
I think that's in order.
Here are the results of the tinkering.
I came up with a stack of context structures that gets pushed when a
PL/Python starts being executed and popped when it returns. At first
they contained just a scratch memory context used by PLyDict_FromTuple.
Then under the premise of confirming the usefulness of introducing such
contexts I removed the global PLy_curr_procedure variable and changed
all users to get the current procedure from the context. It seems to
have worked, so the total count of global variables is unchanged - hooray!
While testing I found one more leak, this time caused by allocating a
structure for caching array type I/O functions and never freeing it.
Attached as separate patch.
Cheers,
Jan
Attachments:
plpython-execution-contexts.patchtext/x-diff; name=plpython-execution-contexts.patchDownload
diff --git a/src/pl/plpython/plpy_cursorobject.c b/src/pl/plpython/plpy_cursorobject.c
index 4226dc7..46930b0 100644
*** a/src/pl/plpython/plpy_cursorobject.c
--- b/src/pl/plpython/plpy_cursorobject.c
***************
*** 14,19 ****
--- 14,20 ----
#include "plpy_cursorobject.h"
#include "plpy_elog.h"
+ #include "plpy_main.h"
#include "plpy_planobject.h"
#include "plpy_procedure.h"
#include "plpy_resultobject.h"
*************** PLy_cursor_query(const char *query)
*** 121,126 ****
--- 122,128 ----
{
SPIPlanPtr plan;
Portal portal;
+ PLyExecutionContext *exec_ctx = PLy_current_execution_context();
pg_verifymbstr(query, strlen(query), false);
*************** PLy_cursor_query(const char *query)
*** 129,136 ****
elog(ERROR, "SPI_prepare failed: %s",
SPI_result_code_string(SPI_result));
portal = SPI_cursor_open(NULL, plan, NULL, NULL,
! PLy_curr_procedure->fn_readonly);
SPI_freeplan(plan);
if (portal == NULL)
--- 131,140 ----
elog(ERROR, "SPI_prepare failed: %s",
SPI_result_code_string(SPI_result));
+ Assert(exec_ctx->curr_proc != NULL);
+
portal = SPI_cursor_open(NULL, plan, NULL, NULL,
! exec_ctx->curr_proc->fn_readonly);
SPI_freeplan(plan);
if (portal == NULL)
*************** PLy_cursor_plan(PyObject *ob, PyObject *
*** 210,215 ****
--- 214,220 ----
Portal portal;
char *volatile nulls;
volatile int j;
+ PLyExecutionContext *exec_ctx = PLy_current_execution_context();
if (nargs > 0)
nulls = palloc(nargs * sizeof(char));
*************** PLy_cursor_plan(PyObject *ob, PyObject *
*** 252,259 ****
}
}
portal = SPI_cursor_open(NULL, plan->plan, plan->values, nulls,
! PLy_curr_procedure->fn_readonly);
if (portal == NULL)
elog(ERROR, "SPI_cursor_open() failed: %s",
SPI_result_code_string(SPI_result));
--- 257,266 ----
}
}
+ Assert(exec_ctx->curr_proc != NULL);
+
portal = SPI_cursor_open(NULL, plan->plan, plan->values, nulls,
! exec_ctx->curr_proc->fn_readonly);
if (portal == NULL)
elog(ERROR, "SPI_cursor_open() failed: %s",
SPI_result_code_string(SPI_result));
diff --git a/src/pl/plpython/plpy_elog.c b/src/pl/plpython/plpy_elog.c
index 741980c..9909f23 100644
*** a/src/pl/plpython/plpy_elog.c
--- b/src/pl/plpython/plpy_elog.c
***************
*** 12,17 ****
--- 12,18 ----
#include "plpy_elog.h"
+ #include "plpy_main.h"
#include "plpy_procedure.h"
*************** PLy_traceback(char **xmsg, char **tbmsg,
*** 260,265 ****
--- 261,267 ----
char *line;
char *plain_filename;
long plain_lineno;
+ PLyExecutionContext *exec_ctx = PLy_current_execution_context();
/*
* The second frame points at the internal function, but to mimick
*************** PLy_traceback(char **xmsg, char **tbmsg,
*** 270,276 ****
else
fname = PyString_AsString(name);
! proname = PLy_procedure_name(PLy_curr_procedure);
plain_filename = PyString_AsString(filename);
plain_lineno = PyInt_AsLong(lineno);
--- 272,280 ----
else
fname = PyString_AsString(name);
! Assert(exec_ctx->curr_proc != NULL);
!
! proname = PLy_procedure_name(exec_ctx->curr_proc);
plain_filename = PyString_AsString(filename);
plain_lineno = PyInt_AsLong(lineno);
*************** PLy_traceback(char **xmsg, char **tbmsg,
*** 287,293 ****
* function code object was compiled with "<string>" as the
* filename
*/
! if (PLy_curr_procedure && plain_filename != NULL &&
strcmp(plain_filename, "<string>") == 0)
{
/*
--- 291,297 ----
* function code object was compiled with "<string>" as the
* filename
*/
! if (exec_ctx->curr_proc && plain_filename != NULL &&
strcmp(plain_filename, "<string>") == 0)
{
/*
*************** PLy_traceback(char **xmsg, char **tbmsg,
*** 299,305 ****
* for. But we do not go as far as traceback.py in reading
* the source of imported modules.
*/
! line = get_source_line(PLy_curr_procedure->src, plain_lineno);
if (line)
{
appendStringInfo(&tbstr, "\n %s", line);
--- 303,309 ----
* for. But we do not go as far as traceback.py in reading
* the source of imported modules.
*/
! line = get_source_line(exec_ctx->curr_proc->src, plain_lineno);
if (line)
{
appendStringInfo(&tbstr, "\n %s", line);
diff --git a/src/pl/plpython/plpy_exec.c b/src/pl/plpython/plpy_exec.c
index ecf4996..280d3ed 100644
*** a/src/pl/plpython/plpy_exec.c
--- b/src/pl/plpython/plpy_exec.c
*************** PLy_function_delete_args(PLyProcedure *p
*** 455,461 ****
static void
plpython_return_error_callback(void *arg)
{
! if (PLy_curr_procedure)
errcontext("while creating return value");
}
--- 455,463 ----
static void
plpython_return_error_callback(void *arg)
{
! PLyExecutionContext *exec_ctx = PLy_current_execution_context();
!
! if (exec_ctx->curr_proc)
errcontext("while creating return value");
}
*************** PLy_modify_tuple(PLyProcedure *proc, PyO
*** 781,787 ****
static void
plpython_trigger_error_callback(void *arg)
{
! if (PLy_curr_procedure)
errcontext("while modifying trigger row");
}
--- 783,791 ----
static void
plpython_trigger_error_callback(void *arg)
{
! PLyExecutionContext *exec_ctx = PLy_current_execution_context();
!
! if (exec_ctx->curr_proc)
errcontext("while modifying trigger row");
}
diff --git a/src/pl/plpython/plpy_main.c b/src/pl/plpython/plpy_main.c
index ae9d87e..c0a2583 100644
*** a/src/pl/plpython/plpy_main.c
--- b/src/pl/plpython/plpy_main.c
***************
*** 12,17 ****
--- 12,18 ----
#include "executor/spi.h"
#include "miscadmin.h"
#include "utils/guc.h"
+ #include "utils/memutils.h"
#include "utils/syscache.h"
#include "plpython.h"
*************** static void plpython_error_callback(void
*** 66,75 ****
--- 67,80 ----
static void plpython_inline_error_callback(void *arg);
static void PLy_init_interp(void);
+ static PLyExecutionContext *PLy_push_execution_context(void);
+ static void PLy_pop_execution_context(void);
+
static const int plpython_python_version = PY_MAJOR_VERSION;
/* initialize global variables */
PyObject *PLy_interp_globals = NULL;
+ List *PLy_execution_contexts = NIL;
void
*************** _PG_init(void)
*** 113,118 ****
--- 118,124 ----
init_procedure_caches();
explicit_subtransactions = NIL;
+ PLy_execution_contexts = NIL;
inited = true;
}
*************** Datum
*** 179,192 ****
plpython_call_handler(PG_FUNCTION_ARGS)
{
Datum retval;
- PLyProcedure *save_curr_proc;
ErrorContextCallback plerrcontext;
if (SPI_connect() != SPI_OK_CONNECT)
elog(ERROR, "SPI_connect failed");
- save_curr_proc = PLy_curr_procedure;
-
/*
* Setup error traceback support for ereport()
*/
--- 185,198 ----
plpython_call_handler(PG_FUNCTION_ARGS)
{
Datum retval;
ErrorContextCallback plerrcontext;
+ PLyExecutionContext *exec_ctx;
+
+ exec_ctx = PLy_push_execution_context();
if (SPI_connect() != SPI_OK_CONNECT)
elog(ERROR, "SPI_connect failed");
/*
* Setup error traceback support for ereport()
*/
*************** plpython_call_handler(PG_FUNCTION_ARGS)
*** 200,222 ****
if (CALLED_AS_TRIGGER(fcinfo))
{
! HeapTuple trv;
proc = PLy_procedure_get(fcinfo->flinfo->fn_oid, true);
! PLy_curr_procedure = proc;
trv = PLy_exec_trigger(fcinfo, proc);
retval = PointerGetDatum(trv);
}
else
{
proc = PLy_procedure_get(fcinfo->flinfo->fn_oid, false);
! PLy_curr_procedure = proc;
retval = PLy_exec_function(fcinfo, proc);
}
}
PG_CATCH();
{
! PLy_curr_procedure = save_curr_proc;
PyErr_Clear();
PG_RE_THROW();
}
--- 206,228 ----
if (CALLED_AS_TRIGGER(fcinfo))
{
! HeapTuple trv;
proc = PLy_procedure_get(fcinfo->flinfo->fn_oid, true);
! exec_ctx->curr_proc = proc;
trv = PLy_exec_trigger(fcinfo, proc);
retval = PointerGetDatum(trv);
}
else
{
proc = PLy_procedure_get(fcinfo->flinfo->fn_oid, false);
! exec_ctx->curr_proc = proc;
retval = PLy_exec_function(fcinfo, proc);
}
}
PG_CATCH();
{
! PLy_pop_execution_context();
PyErr_Clear();
PG_RE_THROW();
}
*************** plpython_call_handler(PG_FUNCTION_ARGS)
*** 225,231 ****
/* Pop the error context stack */
error_context_stack = plerrcontext.previous;
! PLy_curr_procedure = save_curr_proc;
return retval;
}
--- 231,237 ----
/* Pop the error context stack */
error_context_stack = plerrcontext.previous;
! PLy_pop_execution_context();
return retval;
}
*************** plpython_inline_handler(PG_FUNCTION_ARGS
*** 244,258 ****
InlineCodeBlock *codeblock = (InlineCodeBlock *) DatumGetPointer(PG_GETARG_DATUM(0));
FunctionCallInfoData fake_fcinfo;
FmgrInfo flinfo;
- PLyProcedure *save_curr_proc;
PLyProcedure proc;
ErrorContextCallback plerrcontext;
if (SPI_connect() != SPI_OK_CONNECT)
elog(ERROR, "SPI_connect failed");
- save_curr_proc = PLy_curr_procedure;
-
/*
* Setup error traceback support for ereport()
*/
--- 250,264 ----
InlineCodeBlock *codeblock = (InlineCodeBlock *) DatumGetPointer(PG_GETARG_DATUM(0));
FunctionCallInfoData fake_fcinfo;
FmgrInfo flinfo;
PLyProcedure proc;
ErrorContextCallback plerrcontext;
+ PLyExecutionContext *exec_ctx;
+
+ exec_ctx = PLy_push_execution_context();
if (SPI_connect() != SPI_OK_CONNECT)
elog(ERROR, "SPI_connect failed");
/*
* Setup error traceback support for ereport()
*/
*************** plpython_inline_handler(PG_FUNCTION_ARGS
*** 273,285 ****
PG_TRY();
{
PLy_procedure_compile(&proc, codeblock->source_text);
! PLy_curr_procedure = &proc;
PLy_exec_function(&fake_fcinfo, &proc);
}
PG_CATCH();
{
PLy_procedure_delete(&proc);
! PLy_curr_procedure = save_curr_proc;
PyErr_Clear();
PG_RE_THROW();
}
--- 279,291 ----
PG_TRY();
{
PLy_procedure_compile(&proc, codeblock->source_text);
! exec_ctx->curr_proc = &proc;
PLy_exec_function(&fake_fcinfo, &proc);
}
PG_CATCH();
{
PLy_procedure_delete(&proc);
! PLy_pop_execution_context();
PyErr_Clear();
PG_RE_THROW();
}
*************** plpython_inline_handler(PG_FUNCTION_ARGS
*** 289,296 ****
/* Pop the error context stack */
error_context_stack = plerrcontext.previous;
!
! PLy_curr_procedure = save_curr_proc;
PG_RETURN_VOID();
}
--- 295,301 ----
/* Pop the error context stack */
error_context_stack = plerrcontext.previous;
! PLy_pop_execution_context();
PG_RETURN_VOID();
}
*************** plpython2_inline_handler(PG_FUNCTION_ARG
*** 303,308 ****
--- 308,322 ----
}
#endif /* PY_MAJOR_VERSION < 3 */
+ PLyExecutionContext *
+ PLy_current_execution_context(void)
+ {
+ if (PLy_execution_contexts == NIL)
+ elog(ERROR, "no Python function is currently executing");
+
+ return linitial(PLy_execution_contexts);
+ }
+
static bool PLy_procedure_is_trigger(Form_pg_proc procStruct)
{
return (procStruct->prorettype == TRIGGEROID ||
*************** static bool PLy_procedure_is_trigger(For
*** 313,321 ****
static void
plpython_error_callback(void *arg)
{
! if (PLy_curr_procedure)
errcontext("PL/Python function \"%s\"",
! PLy_procedure_name(PLy_curr_procedure));
}
static void
--- 327,337 ----
static void
plpython_error_callback(void *arg)
{
! PLyExecutionContext *exec_ctx = PLy_current_execution_context();
!
! if (exec_ctx->curr_proc)
errcontext("PL/Python function \"%s\"",
! PLy_procedure_name(exec_ctx->curr_proc));
}
static void
*************** plpython_inline_error_callback(void *arg
*** 323,325 ****
--- 339,371 ----
{
errcontext("PL/Python anonymous code block");
}
+
+ static PLyExecutionContext *
+ PLy_push_execution_context(void)
+ {
+ PLyExecutionContext *context = palloc(sizeof(PLyExecutionContext));
+
+ context->curr_proc = NULL;
+ context->scratch_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "PL/Python scratch context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ PLy_execution_contexts = lcons(context, PLy_execution_contexts);
+ return context;
+ }
+
+ static void
+ PLy_pop_execution_context(void)
+ {
+ PLyExecutionContext *context;
+
+ if (PLy_execution_contexts == NIL)
+ elog(ERROR, "no Python function is currently executing");
+
+ context = linitial(PLy_execution_contexts);
+ PLy_execution_contexts = list_delete_first(PLy_execution_contexts);
+
+ MemoryContextDelete(context->scratch_ctx);
+ }
diff --git a/src/pl/plpython/plpy_main.h b/src/pl/plpython/plpy_main.h
index a71aead..7935100 100644
*** a/src/pl/plpython/plpy_main.h
--- b/src/pl/plpython/plpy_main.h
***************
*** 10,13 ****
--- 10,27 ----
/* the interpreter's globals dict */
extern PyObject *PLy_interp_globals;
+ /* A stack of PL/Python execution contexts. Each time user-defined Python code
+ * is called, an execution context is created and put on the stack. After the
+ * Python code returns, the context is destroyed. */
+ extern List *PLy_execution_contexts;
+
+ typedef struct PLyExecutionContext
+ {
+ PLyProcedure *curr_proc; /* the currently executing procedure */
+ MemoryContext scratch_ctx; /* a context to fo things like type I/O */
+ } PLyExecutionContext;
+
+ /* Get the current execution context */
+ extern PLyExecutionContext *PLy_current_execution_context(void);
+
#endif /* PLPY_MAIN_H */
diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c
index 229966a..7fb5f00 100644
*** a/src/pl/plpython/plpy_procedure.c
--- b/src/pl/plpython/plpy_procedure.c
***************
*** 22,30 ****
#include "plpy_main.h"
- PLyProcedure *PLy_curr_procedure = NULL;
-
-
static HTAB *PLy_procedure_cache = NULL;
static HTAB *PLy_trigger_cache = NULL;
--- 22,27 ----
diff --git a/src/pl/plpython/plpy_procedure.h b/src/pl/plpython/plpy_procedure.h
index e986c7e..c7405e0 100644
*** a/src/pl/plpython/plpy_procedure.h
--- b/src/pl/plpython/plpy_procedure.h
*************** extern PLyProcedure *PLy_procedure_get(O
*** 45,52 ****
extern void PLy_procedure_compile(PLyProcedure *proc, const char *src);
extern void PLy_procedure_delete(PLyProcedure *proc);
-
- /* currently active plpython function */
- extern PLyProcedure *PLy_curr_procedure;
-
#endif /* PLPY_PROCEDURE_H */
--- 45,48 ----
diff --git a/src/pl/plpython/plpy_spi.c b/src/pl/plpython/plpy_spi.c
index 0d63c4f..2448d9f 100644
*** a/src/pl/plpython/plpy_spi.c
--- b/src/pl/plpython/plpy_spi.c
***************
*** 18,23 ****
--- 18,24 ----
#include "plpy_spi.h"
#include "plpy_elog.h"
+ #include "plpy_main.h"
#include "plpy_planobject.h"
#include "plpy_plpymodule.h"
#include "plpy_procedure.h"
*************** PLy_spi_execute_plan(PyObject *ob, PyObj
*** 238,243 ****
--- 239,245 ----
{
char *volatile nulls;
volatile int j;
+ PLyExecutionContext *exec_ctx = PLy_current_execution_context();
if (nargs > 0)
nulls = palloc(nargs * sizeof(char));
*************** PLy_spi_execute_plan(PyObject *ob, PyObj
*** 280,287 ****
}
}
rv = SPI_execute_plan(plan->plan, plan->values, nulls,
! PLy_curr_procedure->fn_readonly, limit);
ret = PLy_spi_execute_fetch_result(SPI_tuptable, SPI_processed, rv);
if (nargs > 0)
--- 282,291 ----
}
}
+ Assert(exec_ctx->curr_proc != NULL);
+
rv = SPI_execute_plan(plan->plan, plan->values, nulls,
! exec_ctx->curr_proc->fn_readonly, limit);
ret = PLy_spi_execute_fetch_result(SPI_tuptable, SPI_processed, rv);
if (nargs > 0)
*************** PLy_spi_execute_query(char *query, long
*** 347,354 ****
PG_TRY();
{
pg_verifymbstr(query, strlen(query), false);
! rv = SPI_execute(query, PLy_curr_procedure->fn_readonly, limit);
ret = PLy_spi_execute_fetch_result(SPI_tuptable, SPI_processed, rv);
PLy_spi_subtransaction_commit(oldcontext, oldowner);
--- 351,362 ----
PG_TRY();
{
+ PLyExecutionContext *exec_ctx = PLy_current_execution_context();
+
+ Assert(exec_ctx->curr_proc != NULL);
+
pg_verifymbstr(query, strlen(query), false);
! rv = SPI_execute(query, exec_ctx->curr_proc->fn_readonly, limit);
ret = PLy_spi_execute_fetch_result(SPI_tuptable, SPI_processed, rv);
PLy_spi_subtransaction_commit(oldcontext, oldowner);
diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c
index d5cac9f..11ba9e8 100644
*** a/src/pl/plpython/plpy_typeio.c
--- b/src/pl/plpython/plpy_typeio.c
***************
*** 23,28 ****
--- 23,29 ----
#include "plpy_typeio.h"
#include "plpy_elog.h"
+ #include "plpy_main.h"
/* I/O function caching */
*************** PyObject *
*** 262,267 ****
--- 263,270 ----
PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc)
{
PyObject *volatile dict;
+ MemoryContext oldcontext = CurrentMemoryContext;
+ PLyExecutionContext *exec_ctx = PLy_current_execution_context();
int i;
if (info->is_rowtype != 1)
*************** PLyDict_FromTuple(PLyTypeInfo *info, Hea
*** 290,303 ****
--- 293,310 ----
PyDict_SetItemString(dict, key, Py_None);
else
{
+ MemoryContextSwitchTo(exec_ctx->scratch_ctx);
value = (info->in.r.atts[i].func) (&info->in.r.atts[i], vattr);
PyDict_SetItemString(dict, key, value);
Py_DECREF(value);
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextReset(exec_ctx->scratch_ctx);
}
}
}
PG_CATCH();
{
+ MemoryContextSwitchTo(oldcontext);
Py_DECREF(dict);
PG_RE_THROW();
}
plpython-array-leak.patchtext/x-diff; name=plpython-array-leak.patchDownload
diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c
index d5cac9f..914e033 100644
*** a/src/pl/plpython/plpy_typeio.c
--- b/src/pl/plpython/plpy_typeio.c
*************** PLy_typeinfo_dealloc(PLyTypeInfo *arg)
*** 73,78 ****
--- 73,88 ----
{
if (arg->is_rowtype == 1)
{
+ if (arg->in.r.natts > 0)
+ {
+ int i;
+
+ for (i = 0; i < arg->in.r.natts; i++)
+ {
+ if (arg->in.r.atts[i].elm != NULL)
+ PLy_free(arg->in.r.atts[i].elm);
+ }
+ }
if (arg->in.r.atts)
PLy_free(arg->in.r.atts);
if (arg->out.r.atts)
=?UTF-8?B?SmFuIFVyYmHFhHNraQ==?= <wulczer@wulczer.org> writes:
I came up with a stack of context structures that gets pushed when a
PL/Python starts being executed and popped when it returns. At first
they contained just a scratch memory context used by PLyDict_FromTuple.
Then under the premise of confirming the usefulness of introducing such
contexts I removed the global PLy_curr_procedure variable and changed
all users to get the current procedure from the context. It seems to
have worked, so the total count of global variables is unchanged - hooray!
Applied with some adjustments --- mainly, I thought you were being
too incautious about ensuring that the stack got popped once it'd been
pushed. The easiest way to fix that was to do the pushes after the
SPI_connect calls, which required decoupling the behavior from
CurrentMemoryContext, which seemed like a good idea anyway.
While testing I found one more leak, this time caused by allocating a
structure for caching array type I/O functions and never freeing it.
Attached as separate patch.
Applied also, but surely if we're leaking memory from the input
descriptors then we should worry about the output ones too?
I made it do that, but if that's wrong, somebody explain why.
regards, tom lane