Interesting question

Started by Larry Rosenmanalmost 25 years ago7 messageshackers
Jump to latest
#1Larry Rosenman
ler@lerctr.org

Greetings,
I have made the following table(s),indexes,etc. I wonder if there
is an index (or something else), I can create to make the query use a
"better" plan. (not that it's slow at the moment, but as the table
grows...).

Schema:

--
-- Selected TOC Entries:
--
\connect - neteng
--
-- TOC Entry ID 2 (OID 18735)
--
-- Name: attack_types_id_seq Type: SEQUENCE Owner: neteng
--

CREATE SEQUENCE "attack_types_id_seq" start 1 increment 1 maxvalue 2147483647 minvalue 1 cache 1 ;

--
-- TOC Entry ID 3 (OID 18754)
--
-- Name: attack_types Type: TABLE Owner: neteng
--

CREATE TABLE "attack_types" (
"id" integer DEFAULT nextval('"attack_types_id_seq"'::text) NOT NULL,
"attack_type" character varying(30),
Constraint "attack_types_pkey" Primary Key ("id")
);

--
-- TOC Entry ID 4 (OID 18769)
--
-- Name: attack_db Type: TABLE Owner: neteng
--

CREATE TABLE "attack_db" (
"attack_type" integer,
"start_time" timestamp with time zone,
"end_time" timestamp with time zone,
"src_router" inet,
"input_int" integer,
"output_int" integer,
"src_as" integer,
"src_ip" inet,
"src_port" integer,
"dst_as" integer,
"dst_ip" inet,
"dst_port" integer,
"protocol" integer,
"tos" integer,
"pr_flags" integer,
"pkts" bigint,
"bytes" bigint,
"next_hop" inet
);

--
-- TOC Entry ID 5 (OID 19897)
--
-- Name: protocols Type: TABLE Owner: neteng
--

CREATE TABLE "protocols" (
"proto" integer,
"proto_name" text
);

\connect - ler
--
-- TOC Entry ID 12 (OID 20362)
--
-- Name: "getattack_type" (integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "getattack_type" (integer) RETURNS text AS 'SELECT CAST(attack_type as text) from attack_types
where id = $1;' LANGUAGE 'sql';

--
-- TOC Entry ID 13 (OID 20462)
--
-- Name: "format_port" (integer,integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "format_port" (integer,integer) RETURNS text AS 'SELECT CASE
WHEN $1 = 1 THEN trim(to_char(($2 >> 8) & 255, ''09'')) || ''-'' ||
trim(to_char($2 & 255,''09''))
WHEN $1 > 1 THEN trim(to_char($2,''00009''))
END;' LANGUAGE 'sql';

--
-- TOC Entry ID 14 (OID 20508)
--
-- Name: "get_protocol" (integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "get_protocol" (integer) RETURNS text AS 'SELECT proto_name FROM protocols
WHERE proto = $1;' LANGUAGE 'sql';

--
-- TOC Entry ID 15 (OID 20548)
--
-- Name: "format_protocol" (integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "format_protocol" (integer) RETURNS text AS 'SELECT CASE
WHEN get_protocol($1) IS NOT NULL THEN trim(get_protocol($1))
ELSE CAST($1 as text)
END;' LANGUAGE 'sql';

--
-- TOC Entry ID 10 (OID 20816)
--
-- Name: "plpgsql_call_handler" () Type: FUNCTION Owner: ler
--

CREATE FUNCTION "plpgsql_call_handler" () RETURNS opaque AS '/usr/local/pgsql/lib/plpgsql.so', 'plpgsql_call_handler' LANGUAGE 'C';

--
-- TOC Entry ID 11 (OID 20817)
--
-- Name: plpgsql Type: PROCEDURAL LANGUAGE Owner:
--

CREATE TRUSTED PROCEDURAL LANGUAGE 'plpgsql' HANDLER "plpgsql_call_handler" LANCOMPILER 'PL/pgSQL';

--
-- TOC Entry ID 16 (OID 20831)
--
-- Name: "tcp_flags" (integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "tcp_flags" (integer) RETURNS text AS 'DECLARE flag ALIAS for $1;
ret text;
BEGIN
IF (flag & 128) = 128 THEN ret := ''C'';
ELSE ret := '' '';
END IF;
IF (flag & 64) = 64 THEN ret := ret || ''E'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 32) = 32 THEN ret := ret || ''U'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 16) = 16 THEN ret := ret || ''A'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 8) = 8 THEN ret := ret || ''P'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 4) = 4 THEN ret := ret || ''R'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 2) = 2 THEN ret := ret || ''S'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 1) = 1 THEN ret := ret || ''F'';
ELSE ret := ret || '' '';
END IF;
RETURN ret;
END;' LANGUAGE 'plpgsql';

--
-- TOC Entry ID 6 (OID 21918)
--
-- Name: exempt_ips Type: TABLE Owner: ler
--

CREATE TABLE "exempt_ips" (
"ip" inet
);

--
-- TOC Entry ID 7 (OID 21918)
--
-- Name: exempt_ips Type: ACL Owner:
--

REVOKE ALL on "exempt_ips" from PUBLIC;
GRANT ALL on "exempt_ips" to PUBLIC;
GRANT ALL on "exempt_ips" to "ler";

--
-- TOC Entry ID 17 (OID 22324)
--
-- Name: "format_flags" (integer,integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "format_flags" (integer,integer) RETURNS text AS 'SELECT CASE
WHEN $1 = 6 THEN tcp_flags($2)
ELSE ''N/A''
END;' LANGUAGE 'sql';

\connect - neteng
--
-- TOC Entry ID 8 (OID 18769)
--
-- Name: "end_index" Type: INDEX Owner: neteng
--

CREATE INDEX "end_index" on "attack_db" using btree ( "end_time" "timestamp_ops" );

--
-- TOC Entry ID 9 (OID 18769)
--
-- Name: "start_index" Type: INDEX Owner: neteng
--

CREATE INDEX "start_index" on "attack_db" using btree ( "start_time" "timestamp_ops" );

--
-- TOC Entry ID 20 (OID 18802)
--
-- Name: "RI_ConstraintTrigger_18801" Type: TRIGGER Owner: neteng
--

CREATE CONSTRAINT TRIGGER "attack_type" AFTER INSERT OR UPDATE ON "attack_db" FROM "attack_types" NOT DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE "RI_FKey_check_ins" ('attack_type', 'attack_db', 'attack_types', 'UNSPECIFIED', 'attack_type', 'id');

--
-- TOC Entry ID 18 (OID 18804)
--
-- Name: "RI_ConstraintTrigger_18803" Type: TRIGGER Owner: neteng
--

CREATE CONSTRAINT TRIGGER "attack_type" AFTER DELETE ON "attack_types" FROM "attack_db" NOT DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE "RI_FKey_noaction_del" ('attack_type', 'attack_db', 'attack_types', 'UNSPECIFIED', 'attack_type', 'id');

--
-- TOC Entry ID 19 (OID 18806)
--
-- Name: "RI_ConstraintTrigger_18805" Type: TRIGGER Owner: neteng
--

CREATE CONSTRAINT TRIGGER "attack_type" AFTER UPDATE ON "attack_types" FROM "attack_db" NOT DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE "RI_FKey_noaction_upd" ('attack_type', 'attack_db', 'attack_types', 'UNSPECIFIED', 'attack_type', 'id');

Query:
EXPLAIN
SELECT to_char(start_time,'MM/DD/YY') as mmddyy,
to_char(start_time,'HH24:MI:SS') as hhmmss,
getattack_type(attack_type) as type,
src_router as router,
input_int as ii,
output_int as oi,
src_as as srcas,host(src_ip) || '/' || masklen(src_ip) || ':' ||
format_port(protocol,src_port) as src_address,
dst_as as dstas,host(dst_ip) || '/' || masklen(dst_ip) || ':' ||
format_port(protocol,dst_port) as dst_address,
format_protocol(protocol) as prot,
tos,format_flags(protocol,pr_flags) as tcpflags,
pkts,bytes,
bytes/pkts as bytes_per_packet,
to_char(end_time,'MM/DD/YY') as end_mmddyy,
to_char(end_time,'HH24:MI:SS') as end_hhmmss,
next_hop
FROM attack_db
WHERE (start_time >= now() - '02:00:00'::interval OR
end_time >= now() - '02:00:00'::interval)
AND host(src_ip) NOT IN (select host(ip) from exempt_ips)
AND host(dst_ip) NOT IN (select host(ip) from exempt_ips)
ORDER BY bytes DESC; ;

Explain Output:

NOTICE: QUERY PLAN:

Sort (cost=10870.77..10870.77 rows=5259 width=120)
-> Seq Scan on attack_db (cost=0.00..10358.95 rows=5259 width=120)
SubPlan
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)

--
Larry Rosenman http://www.lerctr.org/~ler
Phone: +1 972-414-9812 E-Mail: ler@lerctr.org
US Mail: 1905 Steamboat Springs Drive, Garland, TX 75044-6749

#2Larry Rosenman
ler@lerctr.org
In reply to: Larry Rosenman (#1)
Re: Interesting question

* Larry Rosenman <ler@lerctr.org> [010518 20:25]:

Greetings,
I have made the following table(s),indexes,etc. I wonder if there
is an index (or something else), I can create to make the query use a
"better" plan. (not that it's slow at the moment, but as the table
grows...).

(Oh, one point, this is 7.2devel...)

Schema:

--
-- Selected TOC Entries:
--
\connect - neteng
--
-- TOC Entry ID 2 (OID 18735)
--
-- Name: attack_types_id_seq Type: SEQUENCE Owner: neteng
--

CREATE SEQUENCE "attack_types_id_seq" start 1 increment 1 maxvalue 2147483647 minvalue 1 cache 1 ;

--
-- TOC Entry ID 3 (OID 18754)
--
-- Name: attack_types Type: TABLE Owner: neteng
--

CREATE TABLE "attack_types" (
"id" integer DEFAULT nextval('"attack_types_id_seq"'::text) NOT NULL,
"attack_type" character varying(30),
Constraint "attack_types_pkey" Primary Key ("id")
);

--
-- TOC Entry ID 4 (OID 18769)
--
-- Name: attack_db Type: TABLE Owner: neteng
--

CREATE TABLE "attack_db" (
"attack_type" integer,
"start_time" timestamp with time zone,
"end_time" timestamp with time zone,
"src_router" inet,
"input_int" integer,
"output_int" integer,
"src_as" integer,
"src_ip" inet,
"src_port" integer,
"dst_as" integer,
"dst_ip" inet,
"dst_port" integer,
"protocol" integer,
"tos" integer,
"pr_flags" integer,
"pkts" bigint,
"bytes" bigint,
"next_hop" inet
);

--
-- TOC Entry ID 5 (OID 19897)
--
-- Name: protocols Type: TABLE Owner: neteng
--

CREATE TABLE "protocols" (
"proto" integer,
"proto_name" text
);

\connect - ler
--
-- TOC Entry ID 12 (OID 20362)
--
-- Name: "getattack_type" (integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "getattack_type" (integer) RETURNS text AS 'SELECT CAST(attack_type as text) from attack_types
where id = $1;' LANGUAGE 'sql';

--
-- TOC Entry ID 13 (OID 20462)
--
-- Name: "format_port" (integer,integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "format_port" (integer,integer) RETURNS text AS 'SELECT CASE
WHEN $1 = 1 THEN trim(to_char(($2 >> 8) & 255, ''09'')) || ''-'' ||
trim(to_char($2 & 255,''09''))
WHEN $1 > 1 THEN trim(to_char($2,''00009''))
END;' LANGUAGE 'sql';

--
-- TOC Entry ID 14 (OID 20508)
--
-- Name: "get_protocol" (integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "get_protocol" (integer) RETURNS text AS 'SELECT proto_name FROM protocols
WHERE proto = $1;' LANGUAGE 'sql';

--
-- TOC Entry ID 15 (OID 20548)
--
-- Name: "format_protocol" (integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "format_protocol" (integer) RETURNS text AS 'SELECT CASE
WHEN get_protocol($1) IS NOT NULL THEN trim(get_protocol($1))
ELSE CAST($1 as text)
END;' LANGUAGE 'sql';

--
-- TOC Entry ID 10 (OID 20816)
--
-- Name: "plpgsql_call_handler" () Type: FUNCTION Owner: ler
--

CREATE FUNCTION "plpgsql_call_handler" () RETURNS opaque AS '/usr/local/pgsql/lib/plpgsql.so', 'plpgsql_call_handler' LANGUAGE 'C';

--
-- TOC Entry ID 11 (OID 20817)
--
-- Name: plpgsql Type: PROCEDURAL LANGUAGE Owner:
--

CREATE TRUSTED PROCEDURAL LANGUAGE 'plpgsql' HANDLER "plpgsql_call_handler" LANCOMPILER 'PL/pgSQL';

--
-- TOC Entry ID 16 (OID 20831)
--
-- Name: "tcp_flags" (integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "tcp_flags" (integer) RETURNS text AS 'DECLARE flag ALIAS for $1;
ret text;
BEGIN
IF (flag & 128) = 128 THEN ret := ''C'';
ELSE ret := '' '';
END IF;
IF (flag & 64) = 64 THEN ret := ret || ''E'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 32) = 32 THEN ret := ret || ''U'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 16) = 16 THEN ret := ret || ''A'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 8) = 8 THEN ret := ret || ''P'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 4) = 4 THEN ret := ret || ''R'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 2) = 2 THEN ret := ret || ''S'';
ELSE ret := ret || '' '';
END IF;
IF (flag & 1) = 1 THEN ret := ret || ''F'';
ELSE ret := ret || '' '';
END IF;
RETURN ret;
END;' LANGUAGE 'plpgsql';

--
-- TOC Entry ID 6 (OID 21918)
--
-- Name: exempt_ips Type: TABLE Owner: ler
--

CREATE TABLE "exempt_ips" (
"ip" inet
);

--
-- TOC Entry ID 7 (OID 21918)
--
-- Name: exempt_ips Type: ACL Owner:
--

REVOKE ALL on "exempt_ips" from PUBLIC;
GRANT ALL on "exempt_ips" to PUBLIC;
GRANT ALL on "exempt_ips" to "ler";

--
-- TOC Entry ID 17 (OID 22324)
--
-- Name: "format_flags" (integer,integer) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "format_flags" (integer,integer) RETURNS text AS 'SELECT CASE
WHEN $1 = 6 THEN tcp_flags($2)
ELSE ''N/A''
END;' LANGUAGE 'sql';

\connect - neteng
--
-- TOC Entry ID 8 (OID 18769)
--
-- Name: "end_index" Type: INDEX Owner: neteng
--

CREATE INDEX "end_index" on "attack_db" using btree ( "end_time" "timestamp_ops" );

--
-- TOC Entry ID 9 (OID 18769)
--
-- Name: "start_index" Type: INDEX Owner: neteng
--

CREATE INDEX "start_index" on "attack_db" using btree ( "start_time" "timestamp_ops" );

--
-- TOC Entry ID 20 (OID 18802)
--
-- Name: "RI_ConstraintTrigger_18801" Type: TRIGGER Owner: neteng
--

CREATE CONSTRAINT TRIGGER "attack_type" AFTER INSERT OR UPDATE ON "attack_db" FROM "attack_types" NOT DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE "RI_FKey_check_ins" ('attack_type', 'attack_db', 'attack_types', 'UNSPECIFIED', 'attack_type', 'id');

--
-- TOC Entry ID 18 (OID 18804)
--
-- Name: "RI_ConstraintTrigger_18803" Type: TRIGGER Owner: neteng
--

CREATE CONSTRAINT TRIGGER "attack_type" AFTER DELETE ON "attack_types" FROM "attack_db" NOT DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE "RI_FKey_noaction_del" ('attack_type', 'attack_db', 'attack_types', 'UNSPECIFIED', 'attack_type', 'id');

--
-- TOC Entry ID 19 (OID 18806)
--
-- Name: "RI_ConstraintTrigger_18805" Type: TRIGGER Owner: neteng
--

CREATE CONSTRAINT TRIGGER "attack_type" AFTER UPDATE ON "attack_types" FROM "attack_db" NOT DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE "RI_FKey_noaction_upd" ('attack_type', 'attack_db', 'attack_types', 'UNSPECIFIED', 'attack_type', 'id');

Query:
EXPLAIN
SELECT to_char(start_time,'MM/DD/YY') as mmddyy,
to_char(start_time,'HH24:MI:SS') as hhmmss,
getattack_type(attack_type) as type,
src_router as router,
input_int as ii,
output_int as oi,
src_as as srcas,host(src_ip) || '/' || masklen(src_ip) || ':' ||
format_port(protocol,src_port) as src_address,
dst_as as dstas,host(dst_ip) || '/' || masklen(dst_ip) || ':' ||
format_port(protocol,dst_port) as dst_address,
format_protocol(protocol) as prot,
tos,format_flags(protocol,pr_flags) as tcpflags,
pkts,bytes,
bytes/pkts as bytes_per_packet,
to_char(end_time,'MM/DD/YY') as end_mmddyy,
to_char(end_time,'HH24:MI:SS') as end_hhmmss,
next_hop
FROM attack_db
WHERE (start_time >= now() - '02:00:00'::interval OR
end_time >= now() - '02:00:00'::interval)
AND host(src_ip) NOT IN (select host(ip) from exempt_ips)
AND host(dst_ip) NOT IN (select host(ip) from exempt_ips)
ORDER BY bytes DESC; ;

Explain Output:

NOTICE: QUERY PLAN:

Sort (cost=10870.77..10870.77 rows=5259 width=120)
-> Seq Scan on attack_db (cost=0.00..10358.95 rows=5259 width=120)
SubPlan
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)

--
Larry Rosenman http://www.lerctr.org/~ler
Phone: +1 972-414-9812 E-Mail: ler@lerctr.org
US Mail: 1905 Steamboat Springs Drive, Garland, TX 75044-6749

---------------------------(end of broadcast)---------------------------
TIP 3: if posting/reading through Usenet, please send an appropriate
subscribe-nomail command to majordomo@postgresql.org so that your
message can get through to the mailing list cleanly

--
Larry Rosenman http://www.lerctr.org/~ler
Phone: +1 972-414-9812 E-Mail: ler@lerctr.org
US Mail: 1905 Steamboat Springs Drive, Garland, TX 75044-6749

#3Tom Lane
tgl@sss.pgh.pa.us
In reply to: Larry Rosenman (#1)
Re: Interesting question

Larry Rosenman <ler@lerctr.org> writes:

EXPLAIN
SELECT ...
FROM attack_db
WHERE (start_time >= now() - '02:00:00'::interval OR
end_time >= now() - '02:00:00'::interval)
AND host(src_ip) NOT IN (select host(ip) from exempt_ips)
AND host(dst_ip) NOT IN (select host(ip) from exempt_ips)
ORDER BY bytes DESC;

NOTICE: QUERY PLAN:

Sort (cost=10870.77..10870.77 rows=5259 width=120)
-> Seq Scan on attack_db (cost=0.00..10358.95 rows=5259 width=120)
SubPlan
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)

Making use of the indexes on start_time and end_time would be a good
thing. The reason it's not doing that is it doesn't think that the
expressions "now() - '02:00:00'::interval" reduce to constants. We
may have a proper solution for that by the time 7.2 comes out, but
in the meantime you could fake it with a function that hides the
noncacheable function and operator --- see previous discussions of
this identical issue in the archives.

The NOT INs are pretty ugly too (and do you need the host() conversion
there? Seems like a waste of cycles...). You might be able to live
with that if the timestamp condition will always be pretty restrictive,
but otherwise they'll be a no go. Consider NOT EXISTS with an index
on exempt_ips(ip).

regards, tom lane

#4Larry Rosenman
ler@lerctr.org
In reply to: Tom Lane (#3)
Re: Interesting question

* Tom Lane <tgl@sss.pgh.pa.us> [010518 21:09]:

Larry Rosenman <ler@lerctr.org> writes:

EXPLAIN
SELECT ...
FROM attack_db
WHERE (start_time >= now() - '02:00:00'::interval OR
end_time >= now() - '02:00:00'::interval)
AND host(src_ip) NOT IN (select host(ip) from exempt_ips)
AND host(dst_ip) NOT IN (select host(ip) from exempt_ips)
ORDER BY bytes DESC;

NOTICE: QUERY PLAN:

Sort (cost=10870.77..10870.77 rows=5259 width=120)
-> Seq Scan on attack_db (cost=0.00..10358.95 rows=5259 width=120)
SubPlan
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)

Making use of the indexes on start_time and end_time would be a good
thing. The reason it's not doing that is it doesn't think that the
expressions "now() - '02:00:00'::interval" reduce to constants. We
may have a proper solution for that by the time 7.2 comes out, but
in the meantime you could fake it with a function that hides the
noncacheable function and operator --- see previous discussions of
this identical issue in the archives.

OK. What would you suggest for the function? I'd like the
'02:00:00'::interval to be a variable somehow to change the
interval we're searching. What fills the table is a daemon that is
looking at the netflow data, and when a packet that matches one of the
attack profiles comes along, it does an insert into attack_db.

The NOT INs are pretty ugly too (and do you need the host() conversion
there? Seems like a waste of cycles...). You might be able to live
with that if the timestamp condition will always be pretty restrictive,
but otherwise they'll be a no go. Consider NOT EXISTS with an index
on exempt_ips(ip).

Yes, because the masks will probably be different each time (this is
from netflow data from my cisco's). The exempt IP's table is, at the
moment 4 ip's, so that's quick anyway.

regards, tom lane

--
Larry Rosenman http://www.lerctr.org/~ler
Phone: +1 972-414-9812 E-Mail: ler@lerctr.org
US Mail: 1905 Steamboat Springs Drive, Garland, TX 75044-6749

#5Larry Rosenman
ler@lerctr.org
In reply to: Larry Rosenman (#4)
Re: Interesting question

* Larry Rosenman <ler@lerctr.org> [010518 21:48]:

* Tom Lane <tgl@sss.pgh.pa.us> [010518 21:09]:

Larry Rosenman <ler@lerctr.org> writes:

EXPLAIN
SELECT ...
FROM attack_db
WHERE (start_time >= now() - '02:00:00'::interval OR
end_time >= now() - '02:00:00'::interval)
AND host(src_ip) NOT IN (select host(ip) from exempt_ips)
AND host(dst_ip) NOT IN (select host(ip) from exempt_ips)
ORDER BY bytes DESC;

NOTICE: QUERY PLAN:

Sort (cost=10870.77..10870.77 rows=5259 width=120)
-> Seq Scan on attack_db (cost=0.00..10358.95 rows=5259 width=120)
SubPlan
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)

Making use of the indexes on start_time and end_time would be a good
thing. The reason it's not doing that is it doesn't think that the
expressions "now() - '02:00:00'::interval" reduce to constants. We
may have a proper solution for that by the time 7.2 comes out, but
in the meantime you could fake it with a function that hides the
noncacheable function and operator --- see previous discussions of
this identical issue in the archives.

OK. What would you suggest for the function? I'd like the
'02:00:00'::interval to be a variable somehow to change the
interval we're searching. What fills the table is a daemon that is
looking at the netflow data, and when a packet that matches one of the
attack profiles comes along, it does an insert into attack_db.

I tried the following function:

--
-- TOC Entry ID 15 (OID 35180)
--
-- Name: "nowminus" (interval) Type: FUNCTION Owner: ler
--

CREATE FUNCTION "nowminus" (interval) RETURNS timestamp with time zone AS 'SELECT now() - $1;' LANGUAGE 'sql';

and the following query:

EXPLAIN
SELECT to_char(start_time,'MM/DD/YY') as mmddyy,
to_char(start_time,'HH24:MI:SS') as hhmmss,
getattack_type(attack_type) as type,
src_router as router,
input_int as ii,
output_int as oi,
src_as as srcas,host(src_ip) || '/' || masklen(src_ip) || ':' ||
format_port(protocol,src_port) as src_address,
dst_as as dstas,host(dst_ip) || '/' || masklen(dst_ip) || ':' ||
format_port(protocol,dst_port) as dst_address,
format_protocol(protocol) as prot,
tos,format_flags(protocol,pr_flags) as tcpflags,
pkts,bytes,
bytes/pkts as bytes_per_packet,
to_char(end_time,'MM/DD/YY') as end_mmddyy,
to_char(end_time,'HH24:MI:SS') as end_hhmmss,
next_hop
FROM attack_db
WHERE (start_time >= nowminus('02:00:00'::interval) OR
end_time >= nowminus('02:00:00'::interval) )
AND host(src_ip) NOT IN (select host(ip) from exempt_ips)
AND host(dst_ip) NOT IN (select host(ip) from exempt_ips)
ORDER BY bytes DESC; ;

And got the following plan:

NOTICE: QUERY PLAN:

Sort (cost=11313.95..11313.95 rows=5497 width=120)
-> Seq Scan on attack_db (cost=0.00..10777.58 rows=5497 width=120)
SubPlan
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)

EXPLAIN

The NOT INs are pretty ugly too (and do you need the host() conversion
there? Seems like a waste of cycles...). You might be able to live
with that if the timestamp condition will always be pretty restrictive,
but otherwise they'll be a no go. Consider NOT EXISTS with an index
on exempt_ips(ip).

Yes, because the masks will probably be different each time (this is
from netflow data from my cisco's). The exempt IP's table is, at the
moment 4 ip's, so that's quick anyway.

regards, tom lane

--
Larry Rosenman http://www.lerctr.org/~ler
Phone: +1 972-414-9812 E-Mail: ler@lerctr.org
US Mail: 1905 Steamboat Springs Drive, Garland, TX 75044-6749

---------------------------(end of broadcast)---------------------------
TIP 1: subscribe and unsubscribe commands go to majordomo@postgresql.org

--
Larry Rosenman http://www.lerctr.org/~ler
Phone: +1 972-414-9812 E-Mail: ler@lerctr.org
US Mail: 1905 Steamboat Springs Drive, Garland, TX 75044-6749

#6Tom Lane
tgl@sss.pgh.pa.us
In reply to: Larry Rosenman (#5)
Re: Interesting question

Larry Rosenman <ler@lerctr.org> writes:

CREATE FUNCTION "nowminus" (interval) RETURNS timestamp with time zone AS 'SELECT now() - $1;' LANGUAGE 'sql';

Right idea, but you need to mark it iscachable.

regards, tom lane

#7Larry Rosenman
ler@lerctr.org
In reply to: Tom Lane (#6)
Re: Interesting question

* Tom Lane <tgl@sss.pgh.pa.us> [010518 22:39]:

Larry Rosenman <ler@lerctr.org> writes:

CREATE FUNCTION "nowminus" (interval) RETURNS timestamp with time zone AS 'SELECT now() - $1;' LANGUAGE 'sql';

Right idea, but you need to mark it iscachable.

Aha:

Same query, with nowminus marked iscachable:
NOTICE: QUERY PLAN:

Sort (cost=513.69..513.69 rows=447 width=120)
-> Index Scan using start_index, end_index on attack_db (cost=0.00..494.01 rows=447 width=120)
SubPlan
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)
-> Seq Scan on exempt_ips (cost=0.00..1.04 rows=4 width=12)

EXPLAIN

regards, tom lane

--
Larry Rosenman http://www.lerctr.org/~ler
Phone: +1 972-414-9812 E-Mail: ler@lerctr.org
US Mail: 1905 Steamboat Springs Drive, Garland, TX 75044-6749