diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
commit | 46651ce6fe013220ed397add242004d764fc0153 (patch) | |
tree | 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /contrib/xml2 | |
parent | Initial commit. (diff) | |
download | postgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip |
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'contrib/xml2')
-rw-r--r-- | contrib/xml2/.gitignore | 4 | ||||
-rw-r--r-- | contrib/xml2/Makefile | 26 | ||||
-rw-r--r-- | contrib/xml2/expected/xml2.out | 224 | ||||
-rw-r--r-- | contrib/xml2/expected/xml2_1.out | 168 | ||||
-rw-r--r-- | contrib/xml2/sql/xml2.sql | 139 | ||||
-rw-r--r-- | contrib/xml2/xml2--1.0--1.1.sql | 18 | ||||
-rw-r--r-- | contrib/xml2/xml2--1.1.sql | 73 | ||||
-rw-r--r-- | contrib/xml2/xml2.control | 6 | ||||
-rw-r--r-- | contrib/xml2/xpath.c | 845 | ||||
-rw-r--r-- | contrib/xml2/xslt_proc.c | 256 |
10 files changed, 1759 insertions, 0 deletions
diff --git a/contrib/xml2/.gitignore b/contrib/xml2/.gitignore new file mode 100644 index 0000000..5dcb3ff --- /dev/null +++ b/contrib/xml2/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/xml2/Makefile b/contrib/xml2/Makefile new file mode 100644 index 0000000..0d703fe --- /dev/null +++ b/contrib/xml2/Makefile @@ -0,0 +1,26 @@ +# contrib/xml2/Makefile + +MODULE_big = pgxml +OBJS = \ + $(WIN32RES) \ + xpath.o \ + xslt_proc.o + +EXTENSION = xml2 +DATA = xml2--1.1.sql xml2--1.0--1.1.sql +PGFILEDESC = "xml2 - XPath querying and XSLT" + +REGRESS = xml2 + +SHLIB_LINK += $(filter -lxslt, $(LIBS)) -lxml2 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/xml2 +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out new file mode 100644 index 0000000..eba6ae6 --- /dev/null +++ b/contrib/xml2/expected/xml2.out @@ -0,0 +1,224 @@ +CREATE EXTENSION xml2; +select query_to_xml('select 1 as x',true,false,''); + query_to_xml +--------------------------------------------------------------- + <table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+ + + + <row> + + <x>1</x> + + </row> + + + + </table> + + +(1 row) + +select xslt_process( query_to_xml('select x from generate_series(1,5) as +x',true,false,'')::text, +$$<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output method="xml" indent="yes" /> +<xsl:template match="*"> + <xsl:copy> + <xsl:copy-of select="@*" /> + <xsl:apply-templates /> + </xsl:copy> +</xsl:template> +<xsl:template match="comment()|processing-instruction()"> + <xsl:copy /> +</xsl:template> +</xsl:stylesheet> +$$::text); + xslt_process +--------------------------------------------------------------- + <?xml version="1.0"?> + + <table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+ + + + <row> + + <x>1</x> + + </row> + + + + <row> + + <x>2</x> + + </row> + + + + <row> + + <x>3</x> + + </row> + + + + <row> + + <x>4</x> + + </row> + + + + <row> + + <x>5</x> + + </row> + + + + </table> + + +(1 row) + +CREATE TABLE xpath_test (id integer NOT NULL, t xml); +INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); + id +---- +(0 rows) + +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + id | doc +----+----- + 1 | 1 +(1 row) + +DROP TABLE xpath_test; +CREATE TABLE xpath_test (id integer NOT NULL, t text); +INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); + id +---- +(0 rows) + +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + id | doc +----+----- + 1 | 1 +(1 row) + +create table articles (article_id integer, article_xml xml, date_entered date); +insert into articles (article_id, article_xml, date_entered) +values (2, '<article><author>test</author><pages>37</pages></article>', now()); +SELECT * FROM +xpath_table('article_id', + 'article_xml', + 'articles', + '/article/author|/article/pages|/article/title', + 'date_entered > ''2003-01-01'' ') +AS t(article_id integer, author text, page_count integer, title text); + article_id | author | page_count | title +------------+--------+------------+------- + 2 | test | 37 | +(1 row) + +-- this used to fail when invoked a second time +select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0" +xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:template match="@*|node()"> + <xsl:copy> + <xsl:apply-templates select="@*|node()"/> + </xsl:copy> + </xsl:template> +</xsl:stylesheet>$$)::xml; + xslt_process +-------------- + <aaa/> + + +(1 row) + +select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0" +xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:template match="@*|node()"> + <xsl:copy> + <xsl:apply-templates select="@*|node()"/> + </xsl:copy> + </xsl:template> +</xsl:stylesheet>$$)::xml; + xslt_process +-------------- + <aaa/> + + +(1 row) + +create table t1 (id integer, xml_data xml); +insert into t1 (id, xml_data) +values +(1, '<attributes><attribute name="attr_1">Some +Value</attribute></attributes>'); +create index idx_xpath on t1 ( xpath_string +('/attributes/attribute[@name="attr_1"]/text()', xml_data::text)); +SELECT xslt_process('<employee><name>cim</name><age>30</age><pay>400</pay></employee>'::text, $$<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <xsl:output method="xml" omit-xml-declaration="yes" indent="yes"/> + <xsl:strip-space elements="*"/> + <xsl:param name="n1"/> + <xsl:param name="n2"/> + <xsl:param name="n3"/> + <xsl:param name="n4"/> + <xsl:param name="n5" select="'me'"/> + <xsl:template match="*"> + <xsl:element name="samples"> + <xsl:element name="sample"> + <xsl:value-of select="$n1"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n2"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n3"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n4"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n5"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n6"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n7"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n8"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n9"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n10"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n11"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n12"/> + </xsl:element> + </xsl:element> + </xsl:template> +</xsl:stylesheet>$$::text, 'n1="v1",n2="v2",n3="v3",n4="v4",n5="v5",n6="v6",n7="v7",n8="v8",n9="v9",n10="v10",n11="v11",n12="v12"'::text); + xslt_process +------------------------ + <samples> + + <sample>v1</sample> + + <sample>v2</sample> + + <sample>v3</sample> + + <sample>v4</sample> + + <sample>v5</sample> + + <sample>v6</sample> + + <sample>v7</sample> + + <sample>v8</sample> + + <sample>v9</sample> + + <sample>v10</sample>+ + <sample>v11</sample>+ + <sample>v12</sample>+ + </samples> + + +(1 row) + +-- possible security exploit +SELECT xslt_process('<xml><foo>Hello from XML</foo></xml>', +$$<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:sax="http://icl.com/saxon" + extension-element-prefixes="sax"> + + <xsl:template match="//foo"> + <sax:output href="0wn3d.txt" method="text"> + <xsl:value-of select="'0wn3d via xml2 extension and libxslt'"/> + <xsl:apply-templates/> + </sax:output> + </xsl:template> +</xsl:stylesheet>$$); +ERROR: failed to apply stylesheet diff --git a/contrib/xml2/expected/xml2_1.out b/contrib/xml2/expected/xml2_1.out new file mode 100644 index 0000000..bac90e5 --- /dev/null +++ b/contrib/xml2/expected/xml2_1.out @@ -0,0 +1,168 @@ +CREATE EXTENSION xml2; +select query_to_xml('select 1 as x',true,false,''); + query_to_xml +--------------------------------------------------------------- + <table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+ + + + <row> + + <x>1</x> + + </row> + + + + </table> + + +(1 row) + +select xslt_process( query_to_xml('select x from generate_series(1,5) as +x',true,false,'')::text, +$$<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output method="xml" indent="yes" /> +<xsl:template match="*"> + <xsl:copy> + <xsl:copy-of select="@*" /> + <xsl:apply-templates /> + </xsl:copy> +</xsl:template> +<xsl:template match="comment()|processing-instruction()"> + <xsl:copy /> +</xsl:template> +</xsl:stylesheet> +$$::text); +ERROR: xslt_process() is not available without libxslt +CREATE TABLE xpath_test (id integer NOT NULL, t xml); +INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); + id +---- +(0 rows) + +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + id | doc +----+----- + 1 | 1 +(1 row) + +DROP TABLE xpath_test; +CREATE TABLE xpath_test (id integer NOT NULL, t text); +INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); + id +---- +(0 rows) + +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + id | doc +----+----- + 1 | 1 +(1 row) + +create table articles (article_id integer, article_xml xml, date_entered date); +insert into articles (article_id, article_xml, date_entered) +values (2, '<article><author>test</author><pages>37</pages></article>', now()); +SELECT * FROM +xpath_table('article_id', + 'article_xml', + 'articles', + '/article/author|/article/pages|/article/title', + 'date_entered > ''2003-01-01'' ') +AS t(article_id integer, author text, page_count integer, title text); + article_id | author | page_count | title +------------+--------+------------+------- + 2 | test | 37 | +(1 row) + +-- this used to fail when invoked a second time +select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0" +xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:template match="@*|node()"> + <xsl:copy> + <xsl:apply-templates select="@*|node()"/> + </xsl:copy> + </xsl:template> +</xsl:stylesheet>$$)::xml; +ERROR: xslt_process() is not available without libxslt +select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0" +xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:template match="@*|node()"> + <xsl:copy> + <xsl:apply-templates select="@*|node()"/> + </xsl:copy> + </xsl:template> +</xsl:stylesheet>$$)::xml; +ERROR: xslt_process() is not available without libxslt +create table t1 (id integer, xml_data xml); +insert into t1 (id, xml_data) +values +(1, '<attributes><attribute name="attr_1">Some +Value</attribute></attributes>'); +create index idx_xpath on t1 ( xpath_string +('/attributes/attribute[@name="attr_1"]/text()', xml_data::text)); +SELECT xslt_process('<employee><name>cim</name><age>30</age><pay>400</pay></employee>'::text, $$<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <xsl:output method="xml" omit-xml-declaration="yes" indent="yes"/> + <xsl:strip-space elements="*"/> + <xsl:param name="n1"/> + <xsl:param name="n2"/> + <xsl:param name="n3"/> + <xsl:param name="n4"/> + <xsl:param name="n5" select="'me'"/> + <xsl:template match="*"> + <xsl:element name="samples"> + <xsl:element name="sample"> + <xsl:value-of select="$n1"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n2"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n3"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n4"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n5"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n6"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n7"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n8"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n9"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n10"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n11"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n12"/> + </xsl:element> + </xsl:element> + </xsl:template> +</xsl:stylesheet>$$::text, 'n1="v1",n2="v2",n3="v3",n4="v4",n5="v5",n6="v6",n7="v7",n8="v8",n9="v9",n10="v10",n11="v11",n12="v12"'::text); +ERROR: xslt_process() is not available without libxslt +-- possible security exploit +SELECT xslt_process('<xml><foo>Hello from XML</foo></xml>', +$$<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:sax="http://icl.com/saxon" + extension-element-prefixes="sax"> + + <xsl:template match="//foo"> + <sax:output href="0wn3d.txt" method="text"> + <xsl:value-of select="'0wn3d via xml2 extension and libxslt'"/> + <xsl:apply-templates/> + </sax:output> + </xsl:template> +</xsl:stylesheet>$$); +ERROR: xslt_process() is not available without libxslt diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql new file mode 100644 index 0000000..ac49cfa --- /dev/null +++ b/contrib/xml2/sql/xml2.sql @@ -0,0 +1,139 @@ +CREATE EXTENSION xml2; + +select query_to_xml('select 1 as x',true,false,''); + +select xslt_process( query_to_xml('select x from generate_series(1,5) as +x',true,false,'')::text, +$$<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output method="xml" indent="yes" /> +<xsl:template match="*"> + <xsl:copy> + <xsl:copy-of select="@*" /> + <xsl:apply-templates /> + </xsl:copy> +</xsl:template> +<xsl:template match="comment()|processing-instruction()"> + <xsl:copy /> +</xsl:template> +</xsl:stylesheet> +$$::text); + +CREATE TABLE xpath_test (id integer NOT NULL, t xml); +INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + +DROP TABLE xpath_test; +CREATE TABLE xpath_test (id integer NOT NULL, t text); +INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + +create table articles (article_id integer, article_xml xml, date_entered date); +insert into articles (article_id, article_xml, date_entered) +values (2, '<article><author>test</author><pages>37</pages></article>', now()); +SELECT * FROM +xpath_table('article_id', + 'article_xml', + 'articles', + '/article/author|/article/pages|/article/title', + 'date_entered > ''2003-01-01'' ') +AS t(article_id integer, author text, page_count integer, title text); + +-- this used to fail when invoked a second time +select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0" +xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:template match="@*|node()"> + <xsl:copy> + <xsl:apply-templates select="@*|node()"/> + </xsl:copy> + </xsl:template> +</xsl:stylesheet>$$)::xml; + +select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0" +xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:template match="@*|node()"> + <xsl:copy> + <xsl:apply-templates select="@*|node()"/> + </xsl:copy> + </xsl:template> +</xsl:stylesheet>$$)::xml; + +create table t1 (id integer, xml_data xml); +insert into t1 (id, xml_data) +values +(1, '<attributes><attribute name="attr_1">Some +Value</attribute></attributes>'); + +create index idx_xpath on t1 ( xpath_string +('/attributes/attribute[@name="attr_1"]/text()', xml_data::text)); + +SELECT xslt_process('<employee><name>cim</name><age>30</age><pay>400</pay></employee>'::text, $$<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <xsl:output method="xml" omit-xml-declaration="yes" indent="yes"/> + <xsl:strip-space elements="*"/> + <xsl:param name="n1"/> + <xsl:param name="n2"/> + <xsl:param name="n3"/> + <xsl:param name="n4"/> + <xsl:param name="n5" select="'me'"/> + <xsl:template match="*"> + <xsl:element name="samples"> + <xsl:element name="sample"> + <xsl:value-of select="$n1"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n2"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n3"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n4"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n5"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n6"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n7"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n8"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n9"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n10"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n11"/> + </xsl:element> + <xsl:element name="sample"> + <xsl:value-of select="$n12"/> + </xsl:element> + </xsl:element> + </xsl:template> +</xsl:stylesheet>$$::text, 'n1="v1",n2="v2",n3="v3",n4="v4",n5="v5",n6="v6",n7="v7",n8="v8",n9="v9",n10="v10",n11="v11",n12="v12"'::text); + +-- possible security exploit +SELECT xslt_process('<xml><foo>Hello from XML</foo></xml>', +$$<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:sax="http://icl.com/saxon" + extension-element-prefixes="sax"> + + <xsl:template match="//foo"> + <sax:output href="0wn3d.txt" method="text"> + <xsl:value-of select="'0wn3d via xml2 extension and libxslt'"/> + <xsl:apply-templates/> + </sax:output> + </xsl:template> +</xsl:stylesheet>$$); diff --git a/contrib/xml2/xml2--1.0--1.1.sql b/contrib/xml2/xml2--1.0--1.1.sql new file mode 100644 index 0000000..350afb0 --- /dev/null +++ b/contrib/xml2/xml2--1.0--1.1.sql @@ -0,0 +1,18 @@ +/* contrib/xml2/xml2--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION xml2 UPDATE TO '1.1'" to load this file. \quit + +ALTER FUNCTION xml_valid(text) PARALLEL SAFE; +ALTER FUNCTION xml_encode_special_chars(text) PARALLEL SAFE; +ALTER FUNCTION xpath_string(text, text) PARALLEL SAFE; +ALTER FUNCTION xpath_nodeset(text, text, text, text) PARALLEL SAFE; +ALTER FUNCTION xpath_number(text, text) PARALLEL SAFE; +ALTER FUNCTION xpath_bool(text, text) PARALLEL SAFE; +ALTER FUNCTION xpath_list(text, text, text) PARALLEL SAFE; +ALTER FUNCTION xpath_list(text, text) PARALLEL SAFE; +ALTER FUNCTION xpath_nodeset(text, text) PARALLEL SAFE; +ALTER FUNCTION xpath_nodeset(text, text, text) PARALLEL SAFE; +ALTER FUNCTION xpath_table(text, text, text, text, text) PARALLEL SAFE; +ALTER FUNCTION xslt_process(text, text, text) PARALLEL SAFE; +ALTER FUNCTION xslt_process(text, text) PARALLEL SAFE; diff --git a/contrib/xml2/xml2--1.1.sql b/contrib/xml2/xml2--1.1.sql new file mode 100644 index 0000000..671372c --- /dev/null +++ b/contrib/xml2/xml2--1.1.sql @@ -0,0 +1,73 @@ +/* contrib/xml2/xml2--1.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION xml2" to load this file. \quit + +--SQL for XML parser + +-- deprecated old name for xml_is_well_formed +CREATE FUNCTION xml_valid(text) RETURNS bool +AS 'xml_is_well_formed' +LANGUAGE INTERNAL STRICT STABLE PARALLEL SAFE; + +CREATE FUNCTION xml_encode_special_chars(text) RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xpath_string(text,text) RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xpath_number(text,text) RETURNS float4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xpath_bool(text,text) RETURNS boolean +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +-- List function + +CREATE FUNCTION xpath_list(text,text,text) RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xpath_list(text,text) RETURNS text +AS 'SELECT xpath_list($1,$2,'','')' +LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE; + +-- Wrapper functions for nodeset where no tags needed + +CREATE FUNCTION xpath_nodeset(text,text) +RETURNS text +AS 'SELECT xpath_nodeset($1,$2,'''','''')' +LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xpath_nodeset(text,text,text) +RETURNS text +AS 'SELECT xpath_nodeset($1,$2,'''',$3)' +LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE; + +-- Table function + +CREATE FUNCTION xpath_table(text,text,text,text,text) +RETURNS setof record +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT STABLE PARALLEL SAFE; + +-- XSLT functions + +CREATE FUNCTION xslt_process(text,text,text) +RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +-- the function checks for the correct argument count +CREATE FUNCTION xslt_process(text,text) +RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; diff --git a/contrib/xml2/xml2.control b/contrib/xml2/xml2.control new file mode 100644 index 0000000..ba2c059 --- /dev/null +++ b/contrib/xml2/xml2.control @@ -0,0 +1,6 @@ +# xml2 extension +comment = 'XPath querying and XSLT' +default_version = '1.1' +module_pathname = '$libdir/pgxml' +# XXX do we still need this to be non-relocatable? +relocatable = false diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c new file mode 100644 index 0000000..1e5b71d --- /dev/null +++ b/contrib/xml2/xpath.c @@ -0,0 +1,845 @@ +/* + * contrib/xml2/xpath.c + * + * Parser interface for DOM-based parser (libxml) rather than + * stream-based SAX-type parser + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "executor/spi.h" +#include "fmgr.h" +#include "funcapi.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/xml.h" + +/* libxml includes */ + +#include <libxml/xpath.h> +#include <libxml/tree.h> +#include <libxml/xmlmemory.h> +#include <libxml/xmlerror.h> +#include <libxml/parserInternals.h> + +PG_MODULE_MAGIC; + +/* exported for use by xslt_proc.c */ + +PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); + +/* workspace for pgxml_xpath() */ + +typedef struct +{ + xmlDocPtr doctree; + xmlXPathContextPtr ctxt; + xmlXPathObjectPtr res; +} xpath_workspace; + +/* local declarations */ + +static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, + xmlChar *toptagname, xmlChar *septagname, + xmlChar *plainsep); + +static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag, + xmlChar *septag, xmlChar *plainsep); + +static xmlChar *pgxml_texttoxmlchar(text *textstring); + +static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath, + xpath_workspace *workspace); + +static void cleanup_workspace(xpath_workspace *workspace); + + +/* + * Initialize for xml parsing. + * + * As with the underlying pg_xml_init function, calls to this MUST be followed + * by a PG_TRY block that guarantees that pg_xml_done is called. + */ +PgXmlErrorContext * +pgxml_parser_init(PgXmlStrictness strictness) +{ + PgXmlErrorContext *xmlerrcxt; + + /* Set up error handling (we share the core's error handler) */ + xmlerrcxt = pg_xml_init(strictness); + + /* Note: we're assuming an elog cannot be thrown by the following calls */ + + /* Initialize libxml */ + xmlInitParser(); + + xmlSubstituteEntitiesDefault(1); + xmlLoadExtDtdDefaultValue = 1; + + return xmlerrcxt; +} + + +/* + * Returns true if document is well-formed + * + * Note: this has been superseded by a core function. We still have to + * have it in the contrib module so that existing SQL-level references + * to the function won't fail; but in normal usage with up-to-date SQL + * definitions for the contrib module, this won't be called. + */ + +PG_FUNCTION_INFO_V1(xml_is_well_formed); + +Datum +xml_is_well_formed(PG_FUNCTION_ARGS) +{ + text *t = PG_GETARG_TEXT_PP(0); /* document buffer */ + bool result = false; + int32 docsize = VARSIZE_ANY_EXHDR(t); + xmlDocPtr doctree; + PgXmlErrorContext *xmlerrcxt; + + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + + PG_TRY(); + { + doctree = xmlParseMemory((char *) VARDATA_ANY(t), docsize); + + result = (doctree != NULL); + + if (doctree != NULL) + xmlFreeDoc(doctree); + } + PG_CATCH(); + { + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + pg_xml_done(xmlerrcxt, false); + + PG_RETURN_BOOL(result); +} + + +/* Encodes special characters (<, >, &, " and \r) as XML entities */ + +PG_FUNCTION_INFO_V1(xml_encode_special_chars); + +Datum +xml_encode_special_chars(PG_FUNCTION_ARGS) +{ + text *tin = PG_GETARG_TEXT_PP(0); + text *tout; + xmlChar *ts, + *tt; + + ts = pgxml_texttoxmlchar(tin); + + tt = xmlEncodeSpecialChars(NULL, ts); + + pfree(ts); + + tout = cstring_to_text((char *) tt); + + xmlFree(tt); + + PG_RETURN_TEXT_P(tout); +} + +/* + * Function translates a nodeset into a text representation + * + * iterates over each node in the set and calls xmlNodeDump to write it to + * an xmlBuffer -from which an xmlChar * string is returned. + * + * each representation is surrounded by <tagname> ... </tagname> + * + * plainsep is an ordinary (not tag) separator - if used, then nodes are + * cast to string as output method + */ +static xmlChar * +pgxmlNodeSetToText(xmlNodeSetPtr nodeset, + xmlChar *toptagname, + xmlChar *septagname, + xmlChar *plainsep) +{ + xmlBufferPtr buf; + xmlChar *result; + int i; + + buf = xmlBufferCreate(); + + if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + { + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, toptagname); + xmlBufferWriteChar(buf, ">"); + } + if (nodeset != NULL) + { + for (i = 0; i < nodeset->nodeNr; i++) + { + if (plainsep != NULL) + { + xmlBufferWriteCHAR(buf, + xmlXPathCastNodeToString(nodeset->nodeTab[i])); + + /* If this isn't the last entry, write the plain sep. */ + if (i < (nodeset->nodeNr) - 1) + xmlBufferWriteChar(buf, (char *) plainsep); + } + else + { + if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + { + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, septagname); + xmlBufferWriteChar(buf, ">"); + } + xmlNodeDump(buf, + nodeset->nodeTab[i]->doc, + nodeset->nodeTab[i], + 1, 0); + + if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + { + xmlBufferWriteChar(buf, "</"); + xmlBufferWriteCHAR(buf, septagname); + xmlBufferWriteChar(buf, ">"); + } + } + } + } + + if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + { + xmlBufferWriteChar(buf, "</"); + xmlBufferWriteCHAR(buf, toptagname); + xmlBufferWriteChar(buf, ">"); + } + result = xmlStrdup(buf->content); + xmlBufferFree(buf); + return result; +} + + +/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter + * into the libxml2 representation + */ +static xmlChar * +pgxml_texttoxmlchar(text *textstring) +{ + return (xmlChar *) text_to_cstring(textstring); +} + +/* Publicly visible XPath functions */ + +/* + * This is a "raw" xpath function. Check that it returns child elements + * properly + */ +PG_FUNCTION_INFO_V1(xpath_nodeset); + +Datum +xpath_nodeset(PG_FUNCTION_ARGS) +{ + text *document = PG_GETARG_TEXT_PP(0); + text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */ + xmlChar *toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2)); + xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3)); + xmlChar *xpath; + text *xpres; + xmlXPathObjectPtr res; + xpath_workspace workspace; + + xpath = pgxml_texttoxmlchar(xpathsupp); + + res = pgxml_xpath(document, xpath, &workspace); + + xpres = pgxml_result_to_text(res, toptag, septag, NULL); + + cleanup_workspace(&workspace); + + pfree(xpath); + + if (xpres == NULL) + PG_RETURN_NULL(); + PG_RETURN_TEXT_P(xpres); +} + +/* + * The following function is almost identical, but returns the elements in + * a list. + */ +PG_FUNCTION_INFO_V1(xpath_list); + +Datum +xpath_list(PG_FUNCTION_ARGS) +{ + text *document = PG_GETARG_TEXT_PP(0); + text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */ + xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2)); + xmlChar *xpath; + text *xpres; + xmlXPathObjectPtr res; + xpath_workspace workspace; + + xpath = pgxml_texttoxmlchar(xpathsupp); + + res = pgxml_xpath(document, xpath, &workspace); + + xpres = pgxml_result_to_text(res, NULL, NULL, plainsep); + + cleanup_workspace(&workspace); + + pfree(xpath); + + if (xpres == NULL) + PG_RETURN_NULL(); + PG_RETURN_TEXT_P(xpres); +} + + +PG_FUNCTION_INFO_V1(xpath_string); + +Datum +xpath_string(PG_FUNCTION_ARGS) +{ + text *document = PG_GETARG_TEXT_PP(0); + text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */ + xmlChar *xpath; + int32 pathsize; + text *xpres; + xmlXPathObjectPtr res; + xpath_workspace workspace; + + pathsize = VARSIZE_ANY_EXHDR(xpathsupp); + + /* + * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL + * at end + */ + /* We could try casting to string using the libxml function? */ + + xpath = (xmlChar *) palloc(pathsize + 9); + memcpy((char *) xpath, "string(", 7); + memcpy((char *) (xpath + 7), VARDATA_ANY(xpathsupp), pathsize); + xpath[pathsize + 7] = ')'; + xpath[pathsize + 8] = '\0'; + + res = pgxml_xpath(document, xpath, &workspace); + + xpres = pgxml_result_to_text(res, NULL, NULL, NULL); + + cleanup_workspace(&workspace); + + pfree(xpath); + + if (xpres == NULL) + PG_RETURN_NULL(); + PG_RETURN_TEXT_P(xpres); +} + + +PG_FUNCTION_INFO_V1(xpath_number); + +Datum +xpath_number(PG_FUNCTION_ARGS) +{ + text *document = PG_GETARG_TEXT_PP(0); + text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */ + xmlChar *xpath; + float4 fRes; + xmlXPathObjectPtr res; + xpath_workspace workspace; + + xpath = pgxml_texttoxmlchar(xpathsupp); + + res = pgxml_xpath(document, xpath, &workspace); + + pfree(xpath); + + if (res == NULL) + PG_RETURN_NULL(); + + fRes = xmlXPathCastToNumber(res); + + cleanup_workspace(&workspace); + + if (xmlXPathIsNaN(fRes)) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT4(fRes); +} + + +PG_FUNCTION_INFO_V1(xpath_bool); + +Datum +xpath_bool(PG_FUNCTION_ARGS) +{ + text *document = PG_GETARG_TEXT_PP(0); + text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */ + xmlChar *xpath; + int bRes; + xmlXPathObjectPtr res; + xpath_workspace workspace; + + xpath = pgxml_texttoxmlchar(xpathsupp); + + res = pgxml_xpath(document, xpath, &workspace); + + pfree(xpath); + + if (res == NULL) + PG_RETURN_BOOL(false); + + bRes = xmlXPathCastToBoolean(res); + + cleanup_workspace(&workspace); + + PG_RETURN_BOOL(bRes); +} + + + +/* Core function to evaluate XPath query */ + +static xmlXPathObjectPtr +pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace) +{ + int32 docsize = VARSIZE_ANY_EXHDR(document); + PgXmlErrorContext *xmlerrcxt; + xmlXPathCompExprPtr comppath; + + workspace->doctree = NULL; + workspace->ctxt = NULL; + workspace->res = NULL; + + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + + PG_TRY(); + { + workspace->doctree = xmlParseMemory((char *) VARDATA_ANY(document), + docsize); + if (workspace->doctree != NULL) + { + workspace->ctxt = xmlXPathNewContext(workspace->doctree); + workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree); + + /* compile the path */ + comppath = xmlXPathCompile(xpath); + if (comppath == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + "XPath Syntax Error"); + + /* Now evaluate the path expression. */ + workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt); + + xmlXPathFreeCompExpr(comppath); + } + } + PG_CATCH(); + { + cleanup_workspace(workspace); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + if (workspace->res == NULL) + cleanup_workspace(workspace); + + pg_xml_done(xmlerrcxt, false); + + return workspace->res; +} + +/* Clean up after processing the result of pgxml_xpath() */ +static void +cleanup_workspace(xpath_workspace *workspace) +{ + if (workspace->res) + xmlXPathFreeObject(workspace->res); + workspace->res = NULL; + if (workspace->ctxt) + xmlXPathFreeContext(workspace->ctxt); + workspace->ctxt = NULL; + if (workspace->doctree) + xmlFreeDoc(workspace->doctree); + workspace->doctree = NULL; +} + +static text * +pgxml_result_to_text(xmlXPathObjectPtr res, + xmlChar *toptag, + xmlChar *septag, + xmlChar *plainsep) +{ + xmlChar *xpresstr; + text *xpres; + + if (res == NULL) + return NULL; + + switch (res->type) + { + case XPATH_NODESET: + xpresstr = pgxmlNodeSetToText(res->nodesetval, + toptag, + septag, plainsep); + break; + + case XPATH_STRING: + xpresstr = xmlStrdup(res->stringval); + break; + + default: + elog(NOTICE, "unsupported XQuery result: %d", res->type); + xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>"); + } + + /* Now convert this result back to text */ + xpres = cstring_to_text((char *) xpresstr); + + /* Free various storage */ + xmlFree(xpresstr); + + return xpres; +} + +/* + * xpath_table is a table function. It needs some tidying (as do the + * other functions here! + */ +PG_FUNCTION_INFO_V1(xpath_table); + +Datum +xpath_table(PG_FUNCTION_ARGS) +{ + /* Function parameters */ + char *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0)); + char *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1)); + char *relname = text_to_cstring(PG_GETARG_TEXT_PP(2)); + char *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3)); + char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4)); + + /* SPI (input tuple) support */ + SPITupleTable *tuptable; + HeapTuple spi_tuple; + TupleDesc spi_tupdesc; + + /* Output tuple (tuplestore) support */ + Tuplestorestate *tupstore = NULL; + TupleDesc ret_tupdesc; + HeapTuple ret_tuple; + + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + AttInMetadata *attinmeta; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + + char **values; + xmlChar **xpaths; + char *pos; + const char *pathsep = "|"; + + int numpaths; + int ret; + uint64 proc; + int j; + int rownr; /* For issuing multiple rows from one original + * document */ + bool had_values; /* To determine end of nodeset results */ + StringInfoData query_buf; + PgXmlErrorContext *xmlerrcxt; + volatile xmlDocPtr doctree = NULL; + + /* We only have a valid tuple description in table function mode */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (rsinfo->expectedDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("xpath_table must be called as a table function"))); + + /* + * We want to materialise because it means that we don't have to carry + * libxml2 parser state between invocations of this function + */ + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("xpath_table requires Materialize mode, but it is not " + "allowed in this context"))); + + /* + * The tuplestore must exist in a higher context than this function call + * (per_query_ctx is used) + */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* + * Create the tuplestore - work_mem is the max in-memory size before a + * file is created on disk to hold it. + */ + tupstore = + tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random, + false, work_mem); + + MemoryContextSwitchTo(oldcontext); + + /* get the requested return tuple description */ + ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc); + + /* must have at least one output column (for the pkey) */ + if (ret_tupdesc->natts < 1) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("xpath_table must have at least one output column"))); + + /* + * At the moment we assume that the returned attributes make sense for the + * XPath specified (i.e. we trust the caller). It's not fatal if they get + * it wrong - the input function for the column type will raise an error + * if the path result can't be converted into the correct binary + * representation. + */ + + attinmeta = TupleDescGetAttInMetadata(ret_tupdesc); + + /* Set return mode and allocate value space. */ + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setDesc = ret_tupdesc; + + values = (char **) palloc(ret_tupdesc->natts * sizeof(char *)); + xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *)); + + /* + * Split XPaths. xpathset is a writable CString. + * + * Note that we stop splitting once we've done all needed for tupdesc + */ + numpaths = 0; + pos = xpathset; + while (numpaths < (ret_tupdesc->natts - 1)) + { + xpaths[numpaths++] = (xmlChar *) pos; + pos = strstr(pos, pathsep); + if (pos != NULL) + { + *pos = '\0'; + pos++; + } + else + break; + } + + /* Now build query */ + initStringInfo(&query_buf); + + /* Build initial sql statement */ + appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s", + pkeyfield, + xmlfield, + relname, + condition); + + if ((ret = SPI_connect()) < 0) + elog(ERROR, "xpath_table: SPI_connect returned %d", ret); + + if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT) + elog(ERROR, "xpath_table: SPI execution failed for query %s", + query_buf.data); + + proc = SPI_processed; + tuptable = SPI_tuptable; + spi_tupdesc = tuptable->tupdesc; + + /* Switch out of SPI context */ + MemoryContextSwitchTo(oldcontext); + + /* + * Check that SPI returned correct result. If you put a comma into one of + * the function parameters, this will catch it when the SPI query returns + * e.g. 3 columns. + */ + if (spi_tupdesc->natts != 2) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("expression returning multiple columns is not valid in parameter list"), + errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts))); + } + + /* + * Setup the parser. This should happen after we are done evaluating the + * query, in case it calls functions that set up libxml differently. + */ + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + + PG_TRY(); + { + /* For each row i.e. document returned from SPI */ + uint64 i; + + for (i = 0; i < proc; i++) + { + char *pkey; + char *xmldoc; + xmlXPathContextPtr ctxt; + xmlXPathObjectPtr res; + xmlChar *resstr; + xmlXPathCompExprPtr comppath; + + /* Extract the row data as C Strings */ + spi_tuple = tuptable->vals[i]; + pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1); + xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2); + + /* + * Clear the values array, so that not-well-formed documents + * return NULL in all columns. Note that this also means that + * spare columns will be NULL. + */ + for (j = 0; j < ret_tupdesc->natts; j++) + values[j] = NULL; + + /* Insert primary key */ + values[0] = pkey; + + /* Parse the document */ + if (xmldoc) + doctree = xmlParseMemory(xmldoc, strlen(xmldoc)); + else /* treat NULL as not well-formed */ + doctree = NULL; + + if (doctree == NULL) + { + /* not well-formed, so output all-NULL tuple */ + ret_tuple = BuildTupleFromCStrings(attinmeta, values); + tuplestore_puttuple(tupstore, ret_tuple); + heap_freetuple(ret_tuple); + } + else + { + /* New loop here - we have to deal with nodeset results */ + rownr = 0; + + do + { + /* Now evaluate the set of xpaths. */ + had_values = false; + for (j = 0; j < numpaths; j++) + { + ctxt = xmlXPathNewContext(doctree); + ctxt->node = xmlDocGetRootElement(doctree); + + /* compile the path */ + comppath = xmlXPathCompile(xpaths[j]); + if (comppath == NULL) + xml_ereport(xmlerrcxt, ERROR, + ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + "XPath Syntax Error"); + + /* Now evaluate the path expression. */ + res = xmlXPathCompiledEval(comppath, ctxt); + xmlXPathFreeCompExpr(comppath); + + if (res != NULL) + { + switch (res->type) + { + case XPATH_NODESET: + /* We see if this nodeset has enough nodes */ + if (res->nodesetval != NULL && + rownr < res->nodesetval->nodeNr) + { + resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); + had_values = true; + } + else + resstr = NULL; + + break; + + case XPATH_STRING: + resstr = xmlStrdup(res->stringval); + break; + + default: + elog(NOTICE, "unsupported XQuery result: %d", res->type); + resstr = xmlStrdup((const xmlChar *) "<unsupported/>"); + } + + /* + * Insert this into the appropriate column in the + * result tuple. + */ + values[j + 1] = (char *) resstr; + } + xmlXPathFreeContext(ctxt); + } + + /* Now add the tuple to the output, if there is one. */ + if (had_values) + { + ret_tuple = BuildTupleFromCStrings(attinmeta, values); + tuplestore_puttuple(tupstore, ret_tuple); + heap_freetuple(ret_tuple); + } + + rownr++; + } while (had_values); + } + + if (doctree != NULL) + xmlFreeDoc(doctree); + doctree = NULL; + + if (pkey) + pfree(pkey); + if (xmldoc) + pfree(xmldoc); + } + } + PG_CATCH(); + { + if (doctree != NULL) + xmlFreeDoc(doctree); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + if (doctree != NULL) + xmlFreeDoc(doctree); + + pg_xml_done(xmlerrcxt, false); + + tuplestore_donestoring(tupstore); + + SPI_finish(); + + rsinfo->setResult = tupstore; + + /* + * SFRM_Materialize mode expects us to return a NULL Datum. The actual + * tuples are in our tuplestore and passed back through rsinfo->setResult. + * rsinfo->setDesc is set to the tuple description that we actually used + * to build our tuples with, so the caller can verify we did what it was + * expecting. + */ + return (Datum) 0; +} diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c new file mode 100644 index 0000000..2189bca --- /dev/null +++ b/contrib/xml2/xslt_proc.c @@ -0,0 +1,256 @@ +/* + * contrib/xml2/xslt_proc.c + * + * XSLT processing functions (requiring libxslt) + * + * John Gray, for Torchbox 2003-04-01 + */ +#include "postgres.h" + +#include "executor/spi.h" +#include "fmgr.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/xml.h" + +#ifdef USE_LIBXSLT + +/* libxml includes */ + +#include <libxml/xpath.h> +#include <libxml/tree.h> +#include <libxml/xmlmemory.h> + +/* libxslt includes */ + +#include <libxslt/xslt.h> +#include <libxslt/xsltInternals.h> +#include <libxslt/security.h> +#include <libxslt/transform.h> +#include <libxslt/xsltutils.h> +#endif /* USE_LIBXSLT */ + + +#ifdef USE_LIBXSLT + +/* declarations to come from xpath.c */ +extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); + +/* local defs */ +static const char **parse_params(text *paramstr); +#endif /* USE_LIBXSLT */ + + +PG_FUNCTION_INFO_V1(xslt_process); + +Datum +xslt_process(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXSLT + + text *doct = PG_GETARG_TEXT_PP(0); + text *ssheet = PG_GETARG_TEXT_PP(1); + text *result; + text *paramstr; + const char **params; + PgXmlErrorContext *xmlerrcxt; + volatile xsltStylesheetPtr stylesheet = NULL; + volatile xmlDocPtr doctree = NULL; + volatile xmlDocPtr restree = NULL; + volatile xsltSecurityPrefsPtr xslt_sec_prefs = NULL; + volatile xsltTransformContextPtr xslt_ctxt = NULL; + volatile int resstat = -1; + xmlChar *resstr = NULL; + int reslen = 0; + + if (fcinfo->nargs == 3) + { + paramstr = PG_GETARG_TEXT_PP(2); + params = parse_params(paramstr); + } + else + { + /* No parameters */ + params = (const char **) palloc(sizeof(char *)); + params[0] = NULL; + } + + /* Setup parser */ + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + + PG_TRY(); + { + xmlDocPtr ssdoc; + bool xslt_sec_prefs_error; + + /* Parse document */ + doctree = xmlParseMemory((char *) VARDATA_ANY(doct), + VARSIZE_ANY_EXHDR(doct)); + + if (doctree == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + "error parsing XML document"); + + /* Same for stylesheet */ + ssdoc = xmlParseMemory((char *) VARDATA_ANY(ssheet), + VARSIZE_ANY_EXHDR(ssheet)); + + if (ssdoc == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + "error parsing stylesheet as XML document"); + + /* After this call we need not free ssdoc separately */ + stylesheet = xsltParseStylesheetDoc(ssdoc); + + if (stylesheet == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + "failed to parse stylesheet"); + + xslt_ctxt = xsltNewTransformContext(stylesheet, doctree); + + xslt_sec_prefs_error = false; + if ((xslt_sec_prefs = xsltNewSecurityPrefs()) == NULL) + xslt_sec_prefs_error = true; + + if (xsltSetSecurityPrefs(xslt_sec_prefs, XSLT_SECPREF_READ_FILE, + xsltSecurityForbid) != 0) + xslt_sec_prefs_error = true; + if (xsltSetSecurityPrefs(xslt_sec_prefs, XSLT_SECPREF_WRITE_FILE, + xsltSecurityForbid) != 0) + xslt_sec_prefs_error = true; + if (xsltSetSecurityPrefs(xslt_sec_prefs, XSLT_SECPREF_CREATE_DIRECTORY, + xsltSecurityForbid) != 0) + xslt_sec_prefs_error = true; + if (xsltSetSecurityPrefs(xslt_sec_prefs, XSLT_SECPREF_READ_NETWORK, + xsltSecurityForbid) != 0) + xslt_sec_prefs_error = true; + if (xsltSetSecurityPrefs(xslt_sec_prefs, XSLT_SECPREF_WRITE_NETWORK, + xsltSecurityForbid) != 0) + xslt_sec_prefs_error = true; + if (xsltSetCtxtSecurityPrefs(xslt_sec_prefs, xslt_ctxt) != 0) + xslt_sec_prefs_error = true; + + if (xslt_sec_prefs_error) + ereport(ERROR, + (errmsg("could not set libxslt security preferences"))); + + restree = xsltApplyStylesheetUser(stylesheet, doctree, params, + NULL, NULL, xslt_ctxt); + + if (restree == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + "failed to apply stylesheet"); + + resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet); + } + PG_CATCH(); + { + if (restree != NULL) + xmlFreeDoc(restree); + if (xslt_ctxt != NULL) + xsltFreeTransformContext(xslt_ctxt); + if (xslt_sec_prefs != NULL) + xsltFreeSecurityPrefs(xslt_sec_prefs); + if (stylesheet != NULL) + xsltFreeStylesheet(stylesheet); + if (doctree != NULL) + xmlFreeDoc(doctree); + xsltCleanupGlobals(); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFreeDoc(restree); + xsltFreeTransformContext(xslt_ctxt); + xsltFreeSecurityPrefs(xslt_sec_prefs); + xsltFreeStylesheet(stylesheet); + xmlFreeDoc(doctree); + xsltCleanupGlobals(); + + pg_xml_done(xmlerrcxt, false); + + /* XXX this is pretty dubious, really ought to throw error instead */ + if (resstat < 0) + PG_RETURN_NULL(); + + result = cstring_to_text_with_len((char *) resstr, reslen); + + if (resstr) + xmlFree(resstr); + + PG_RETURN_TEXT_P(result); +#else /* !USE_LIBXSLT */ + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xslt_process() is not available without libxslt"))); + PG_RETURN_NULL(); +#endif /* USE_LIBXSLT */ +} + +#ifdef USE_LIBXSLT + +static const char ** +parse_params(text *paramstr) +{ + char *pos; + char *pstr; + char *nvsep = "="; + char *itsep = ","; + const char **params; + int max_params; + int nparams; + + pstr = text_to_cstring(paramstr); + + max_params = 20; /* must be even! */ + params = (const char **) palloc((max_params + 1) * sizeof(char *)); + nparams = 0; + + pos = pstr; + + while (*pos != '\0') + { + if (nparams >= max_params) + { + max_params *= 2; + params = (const char **) repalloc(params, + (max_params + 1) * sizeof(char *)); + } + params[nparams++] = pos; + pos = strstr(pos, nvsep); + if (pos != NULL) + { + *pos = '\0'; + pos++; + } + else + { + /* No equal sign, so ignore this "parameter" */ + nparams--; + break; + } + + /* since max_params is even, we still have nparams < max_params */ + params[nparams++] = pos; + pos = strstr(pos, itsep); + if (pos != NULL) + { + *pos = '\0'; + pos++; + } + else + break; + } + + /* Add the terminator marker; we left room for it in the palloc's */ + params[nparams] = NULL; + + return params; +} + +#endif /* USE_LIBXSLT */ |