diff options
Diffstat (limited to '')
-rw-r--r-- | src/backend/utils/adt/xml.c | 4766 |
1 files changed, 4766 insertions, 0 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c new file mode 100644 index 0000000..f90a942 --- /dev/null +++ b/src/backend/utils/adt/xml.c @@ -0,0 +1,4766 @@ +/*------------------------------------------------------------------------- + * + * xml.c + * XML data type support. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/utils/adt/xml.c + * + *------------------------------------------------------------------------- + */ + +/* + * Generally, XML type support is only available when libxml use was + * configured during the build. But even if that is not done, the + * type and all the functions are available, but most of them will + * fail. For one thing, this avoids having to manage variant catalog + * installations. But it also has nice effects such as that you can + * dump a database containing XML type data even if the server is not + * linked with libxml. Thus, make sure xml_out() works even if nothing + * else does. + */ + +/* + * Notes on memory management: + * + * Sometimes libxml allocates global structures in the hope that it can reuse + * them later on. This makes it impractical to change the xmlMemSetup + * functions on-the-fly; that is likely to lead to trying to pfree() chunks + * allocated with malloc() or vice versa. Since libxml might be used by + * loadable modules, eg libperl, our only safe choices are to change the + * functions at postmaster/backend launch or not at all. Since we'd rather + * not activate libxml in sessions that might never use it, the latter choice + * is the preferred one. However, for debugging purposes it can be awfully + * handy to constrain libxml's allocations to be done in a specific palloc + * context, where they're easy to track. Therefore there is code here that + * can be enabled in debug builds to redirect libxml's allocations into a + * special context LibxmlContext. It's not recommended to turn this on in + * a production build because of the possibility of bad interactions with + * external modules. + */ +/* #define USE_LIBXMLCONTEXT */ + +#include "postgres.h" + +#ifdef USE_LIBXML +#include <libxml/chvalid.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/tree.h> +#include <libxml/uri.h> +#include <libxml/xmlerror.h> +#include <libxml/xmlversion.h> +#include <libxml/xmlwriter.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> + +/* + * We used to check for xmlStructuredErrorContext via a configure test; but + * that doesn't work on Windows, so instead use this grottier method of + * testing the library version number. + */ +#if LIBXML_VERSION >= 20704 +#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1 +#endif +#endif /* USE_LIBXML */ + +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "catalog/pg_class.h" +#include "catalog/pg_type.h" +#include "commands/dbcommands.h" +#include "executor/spi.h" +#include "executor/tablefunc.h" +#include "fmgr.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" +#include "nodes/nodeFuncs.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" +#include "utils/xml.h" + + +/* GUC variables */ +int xmlbinary; +int xmloption; + +#ifdef USE_LIBXML + +/* random number to identify PgXmlErrorContext */ +#define ERRCXT_MAGIC 68275028 + +struct PgXmlErrorContext +{ + int magic; + /* strictness argument passed to pg_xml_init */ + PgXmlStrictness strictness; + /* current error status and accumulated message, if any */ + bool err_occurred; + StringInfoData err_buf; + /* previous libxml error handling state (saved by pg_xml_init) */ + xmlStructuredErrorFunc saved_errfunc; + void *saved_errcxt; + /* previous libxml entity handler (saved by pg_xml_init) */ + xmlExternalEntityLoader saved_entityfunc; +}; + +static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt); +static void xml_errorHandler(void *data, xmlErrorPtr error); +static void xml_ereport_by_code(int level, int sqlcode, + const char *msg, int errcode); +static void chopStringInfoNewlines(StringInfo str); +static void appendStringInfoLineSeparator(StringInfo str); + +#ifdef USE_LIBXMLCONTEXT + +static MemoryContext LibxmlContext = NULL; + +static void xml_memory_init(void); +static void *xml_palloc(size_t size); +static void *xml_repalloc(void *ptr, size_t size); +static void xml_pfree(void *ptr); +static char *xml_pstrdup(const char *string); +#endif /* USE_LIBXMLCONTEXT */ + +static xmlChar *xml_text2xmlChar(text *in); +static int parse_xml_decl(const xmlChar *str, size_t *lenp, + xmlChar **version, xmlChar **encoding, int *standalone); +static bool print_xml_decl(StringInfo buf, const xmlChar *version, + pg_enc encoding, int standalone); +static bool xml_doctype_in_content(const xmlChar *str); +static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding); +static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt); +static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, + ArrayBuildState *astate, + PgXmlErrorContext *xmlerrcxt); +static xmlChar *pg_xmlCharStrndup(const char *str, size_t len); +#endif /* USE_LIBXML */ + +static void xmldata_root_element_start(StringInfo result, const char *eltname, + const char *xmlschema, const char *targetns, + bool top_level); +static void xmldata_root_element_end(StringInfo result, const char *eltname); +static StringInfo query_to_xml_internal(const char *query, char *tablename, + const char *xmlschema, bool nulls, bool tableforest, + const char *targetns, bool top_level); +static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, + bool nulls, bool tableforest, const char *targetns); +static const char *map_sql_schema_to_xmlschema_types(Oid nspid, + List *relid_list, bool nulls, + bool tableforest, const char *targetns); +static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list, + bool nulls, bool tableforest, + const char *targetns); +static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod); +static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list); +static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod); +static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, + char *tablename, bool nulls, bool tableforest, + const char *targetns, bool top_level); + +/* XMLTABLE support */ +#ifdef USE_LIBXML +/* random number to identify XmlTableContext */ +#define XMLTABLE_CONTEXT_MAGIC 46922182 +typedef struct XmlTableBuilderData +{ + int magic; + int natts; + long int row_count; + PgXmlErrorContext *xmlerrcxt; + xmlParserCtxtPtr ctxt; + xmlDocPtr doc; + xmlXPathContextPtr xpathcxt; + xmlXPathCompExprPtr xpathcomp; + xmlXPathObjectPtr xpathobj; + xmlXPathCompExprPtr *xpathscomp; +} XmlTableBuilderData; +#endif + +static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts); +static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value); +static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name, + const char *uri); +static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path); +static void XmlTableSetColumnFilter(struct TableFuncScanState *state, + const char *path, int colnum); +static bool XmlTableFetchRow(struct TableFuncScanState *state); +static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum, + Oid typid, int32 typmod, bool *isnull); +static void XmlTableDestroyOpaque(struct TableFuncScanState *state); + +const TableFuncRoutine XmlTableRoutine = +{ + XmlTableInitOpaque, + XmlTableSetDocument, + XmlTableSetNamespace, + XmlTableSetRowFilter, + XmlTableSetColumnFilter, + XmlTableFetchRow, + XmlTableGetValue, + XmlTableDestroyOpaque +}; + +#define NO_XML_SUPPORT() \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("unsupported XML feature"), \ + errdetail("This functionality requires the server to be built with libxml support."), \ + errhint("You need to rebuild PostgreSQL using %s.", "--with-libxml"))) + + +/* from SQL/XML:2008 section 4.9 */ +#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema" +#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance" +#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml" + + +#ifdef USE_LIBXML + +static int +xmlChar_to_encoding(const xmlChar *encoding_name) +{ + int encoding = pg_char_to_encoding((const char *) encoding_name); + + if (encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid encoding name \"%s\"", + (const char *) encoding_name))); + return encoding; +} +#endif + + +/* + * xml_in uses a plain C string to VARDATA conversion, so for the time being + * we use the conversion function for the text datatype. + * + * This is only acceptable so long as xmltype and text use the same + * representation. + */ +Datum +xml_in(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + char *s = PG_GETARG_CSTRING(0); + xmltype *vardata; + xmlDocPtr doc; + + vardata = (xmltype *) cstring_to_text(s); + + /* + * Parse the data to check if it is well-formed XML data. Assume that + * ERROR occurred if parsing failed. + */ + doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding()); + xmlFreeDoc(doc); + + PG_RETURN_XML_P(vardata); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +#define PG_XML_DEFAULT_VERSION "1.0" + + +/* + * xml_out_internal uses a plain VARDATA to C string conversion, so for the + * time being we use the conversion function for the text datatype. + * + * This is only acceptable so long as xmltype and text use the same + * representation. + */ +static char * +xml_out_internal(xmltype *x, pg_enc target_encoding) +{ + char *str = text_to_cstring((text *) x); + +#ifdef USE_LIBXML + size_t len = strlen(str); + xmlChar *version; + int standalone; + int res_code; + + if ((res_code = parse_xml_decl((xmlChar *) str, + &len, &version, NULL, &standalone)) == 0) + { + StringInfoData buf; + + initStringInfo(&buf); + + if (!print_xml_decl(&buf, version, target_encoding, standalone)) + { + /* + * If we are not going to produce an XML declaration, eat a single + * newline in the original string to prevent empty first lines in + * the output. + */ + if (*(str + len) == '\n') + len += 1; + } + appendStringInfoString(&buf, str + len); + + pfree(str); + + return buf.data; + } + + xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR, + "could not parse XML declaration in stored value", + res_code); +#endif + return str; +} + + +Datum +xml_out(PG_FUNCTION_ARGS) +{ + xmltype *x = PG_GETARG_XML_P(0); + + /* + * xml_out removes the encoding property in all cases. This is because we + * cannot control from here whether the datum will be converted to a + * different client encoding, so we'd do more harm than good by including + * it. + */ + PG_RETURN_CSTRING(xml_out_internal(x, 0)); +} + + +Datum +xml_recv(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + xmltype *result; + char *str; + char *newstr; + int nbytes; + xmlDocPtr doc; + xmlChar *encodingStr = NULL; + int encoding; + + /* + * Read the data in raw format. We don't know yet what the encoding is, as + * that information is embedded in the xml declaration; so we have to + * parse that before converting to server encoding. + */ + nbytes = buf->len - buf->cursor; + str = (char *) pq_getmsgbytes(buf, nbytes); + + /* + * We need a null-terminated string to pass to parse_xml_decl(). Rather + * than make a separate copy, make the temporary result one byte bigger + * than it needs to be. + */ + result = palloc(nbytes + 1 + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + memcpy(VARDATA(result), str, nbytes); + str = VARDATA(result); + str[nbytes] = '\0'; + + parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL); + + /* + * If encoding wasn't explicitly specified in the XML header, treat it as + * UTF-8, as that's the default in XML. This is different from xml_in(), + * where the input has to go through the normal client to server encoding + * conversion. + */ + encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8; + + /* + * Parse the data to check if it is well-formed XML data. Assume that + * xml_parse will throw ERROR if not. + */ + doc = xml_parse(result, xmloption, true, encoding); + xmlFreeDoc(doc); + + /* Now that we know what we're dealing with, convert to server encoding */ + newstr = pg_any_to_server(str, nbytes, encoding); + + if (newstr != str) + { + pfree(result); + result = (xmltype *) cstring_to_text(newstr); + pfree(newstr); + } + + PG_RETURN_XML_P(result); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xml_send(PG_FUNCTION_ARGS) +{ + xmltype *x = PG_GETARG_XML_P(0); + char *outval; + StringInfoData buf; + + /* + * xml_out_internal doesn't convert the encoding, it just prints the right + * declaration. pq_sendtext will do the conversion. + */ + outval = xml_out_internal(x, pg_get_client_encoding()); + + pq_begintypsend(&buf); + pq_sendtext(&buf, outval, strlen(outval)); + pfree(outval); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +#ifdef USE_LIBXML +static void +appendStringInfoText(StringInfo str, const text *t) +{ + appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); +} +#endif + + +static xmltype * +stringinfo_to_xmltype(StringInfo buf) +{ + return (xmltype *) cstring_to_text_with_len(buf->data, buf->len); +} + + +static xmltype * +cstring_to_xmltype(const char *string) +{ + return (xmltype *) cstring_to_text(string); +} + + +#ifdef USE_LIBXML +static xmltype * +xmlBuffer_to_xmltype(xmlBufferPtr buf) +{ + return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf), + xmlBufferLength(buf)); +} +#endif + + +Datum +xmlcomment(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *arg = PG_GETARG_TEXT_PP(0); + char *argdata = VARDATA_ANY(arg); + int len = VARSIZE_ANY_EXHDR(arg); + StringInfoData buf; + int i; + + /* check for "--" in string or "-" at the end */ + for (i = 1; i < len; i++) + { + if (argdata[i] == '-' && argdata[i - 1] == '-') + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_COMMENT), + errmsg("invalid XML comment"))); + } + if (len > 0 && argdata[len - 1] == '-') + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_COMMENT), + errmsg("invalid XML comment"))); + + initStringInfo(&buf); + appendStringInfoString(&buf, "<!--"); + appendStringInfoText(&buf, arg); + appendStringInfoString(&buf, "-->"); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + + +/* + * TODO: xmlconcat needs to merge the notations and unparsed entities + * of the argument values. Not very important in practice, though. + */ +xmltype * +xmlconcat(List *args) +{ +#ifdef USE_LIBXML + int global_standalone = 1; + xmlChar *global_version = NULL; + bool global_version_no_value = false; + StringInfoData buf; + ListCell *v; + + initStringInfo(&buf); + foreach(v, args) + { + xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v))); + size_t len; + xmlChar *version; + int standalone; + char *str; + + len = VARSIZE(x) - VARHDRSZ; + str = text_to_cstring((text *) x); + + parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone); + + if (standalone == 0 && global_standalone == 1) + global_standalone = 0; + if (standalone < 0) + global_standalone = -1; + + if (!version) + global_version_no_value = true; + else if (!global_version) + global_version = version; + else if (xmlStrcmp(version, global_version) != 0) + global_version_no_value = true; + + appendStringInfoString(&buf, str + len); + pfree(str); + } + + if (!global_version_no_value || global_standalone >= 0) + { + StringInfoData buf2; + + initStringInfo(&buf2); + + print_xml_decl(&buf2, + (!global_version_no_value) ? global_version : NULL, + 0, + global_standalone); + + appendBinaryStringInfo(&buf2, buf.data, buf.len); + buf = buf2; + } + + return stringinfo_to_xmltype(&buf); +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +/* + * XMLAGG support + */ +Datum +xmlconcat2(PG_FUNCTION_ARGS) +{ + if (PG_ARGISNULL(0)) + { + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + else + PG_RETURN_XML_P(PG_GETARG_XML_P(1)); + } + else if (PG_ARGISNULL(1)) + PG_RETURN_XML_P(PG_GETARG_XML_P(0)); + else + PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0), + PG_GETARG_XML_P(1)))); +} + + +Datum +texttoxml(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_PP(0); + + PG_RETURN_XML_P(xmlparse(data, xmloption, true)); +} + + +Datum +xmltotext(PG_FUNCTION_ARGS) +{ + xmltype *data = PG_GETARG_XML_P(0); + + /* It's actually binary compatible. */ + PG_RETURN_TEXT_P((text *) data); +} + + +text * +xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg) +{ + if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data)) + ereport(ERROR, + (errcode(ERRCODE_NOT_AN_XML_DOCUMENT), + errmsg("not an XML document"))); + + /* It's actually binary compatible, save for the above check. */ + return (text *) data; +} + + +xmltype * +xmlelement(XmlExpr *xexpr, + Datum *named_argvalue, bool *named_argnull, + Datum *argvalue, bool *argnull) +{ +#ifdef USE_LIBXML + xmltype *result; + List *named_arg_strings; + List *arg_strings; + int i; + ListCell *arg; + ListCell *narg; + PgXmlErrorContext *xmlerrcxt; + volatile xmlBufferPtr buf = NULL; + volatile xmlTextWriterPtr writer = NULL; + + /* + * All arguments are already evaluated, and their values are passed in the + * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids + * issues if one of the arguments involves a call to some other function + * or subsystem that wants to use libxml on its own terms. We examine the + * original XmlExpr to identify the numbers and types of the arguments. + */ + named_arg_strings = NIL; + i = 0; + foreach(arg, xexpr->named_args) + { + Expr *e = (Expr *) lfirst(arg); + char *str; + + if (named_argnull[i]) + str = NULL; + else + str = map_sql_value_to_xml_value(named_argvalue[i], + exprType((Node *) e), + false); + named_arg_strings = lappend(named_arg_strings, str); + i++; + } + + arg_strings = NIL; + i = 0; + foreach(arg, xexpr->args) + { + Expr *e = (Expr *) lfirst(arg); + char *str; + + /* here we can just forget NULL elements immediately */ + if (!argnull[i]) + { + str = map_sql_value_to_xml_value(argvalue[i], + exprType((Node *) e), + true); + arg_strings = lappend(arg_strings, str); + } + i++; + } + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + buf = xmlBufferCreate(); + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlBuffer"); + writer = xmlNewTextWriterMemory(buf, 0); + if (writer == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlTextWriter"); + + xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); + + forboth(arg, named_arg_strings, narg, xexpr->arg_names) + { + char *str = (char *) lfirst(arg); + char *argname = strVal(lfirst(narg)); + + if (str) + xmlTextWriterWriteAttribute(writer, + (xmlChar *) argname, + (xmlChar *) str); + } + + foreach(arg, arg_strings) + { + char *str = (char *) lfirst(arg); + + xmlTextWriterWriteRaw(writer, (xmlChar *) str); + } + + xmlTextWriterEndElement(writer); + + /* we MUST do this now to flush data out to the buffer ... */ + xmlFreeTextWriter(writer); + writer = NULL; + + result = xmlBuffer_to_xmltype(buf); + } + PG_CATCH(); + { + if (writer) + xmlFreeTextWriter(writer); + if (buf) + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, false); + + return result; +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +xmltype * +xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace) +{ +#ifdef USE_LIBXML + xmlDocPtr doc; + + doc = xml_parse(data, xmloption_arg, preserve_whitespace, + GetDatabaseEncoding()); + xmlFreeDoc(doc); + + return (xmltype *) data; +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +xmltype * +xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null) +{ +#ifdef USE_LIBXML + xmltype *result; + StringInfoData buf; + + if (pg_strcasecmp(target, "xml") == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), /* really */ + errmsg("invalid XML processing instruction"), + errdetail("XML processing instruction target name cannot be \"%s\".", target))); + + /* + * Following the SQL standard, the null check comes after the syntax check + * above. + */ + *result_is_null = arg_is_null; + if (*result_is_null) + return NULL; + + initStringInfo(&buf); + + appendStringInfo(&buf, "<?%s", target); + + if (arg != NULL) + { + char *string; + + string = text_to_cstring(arg); + if (strstr(string, "?>") != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION), + errmsg("invalid XML processing instruction"), + errdetail("XML processing instruction cannot contain \"?>\"."))); + + appendStringInfoChar(&buf, ' '); + appendStringInfoString(&buf, string + strspn(string, " ")); + pfree(string); + } + appendStringInfoString(&buf, "?>"); + + result = stringinfo_to_xmltype(&buf); + pfree(buf.data); + return result; +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +xmltype * +xmlroot(xmltype *data, text *version, int standalone) +{ +#ifdef USE_LIBXML + char *str; + size_t len; + xmlChar *orig_version; + int orig_standalone; + StringInfoData buf; + + len = VARSIZE(data) - VARHDRSZ; + str = text_to_cstring((text *) data); + + parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone); + + if (version) + orig_version = xml_text2xmlChar(version); + else + orig_version = NULL; + + switch (standalone) + { + case XML_STANDALONE_YES: + orig_standalone = 1; + break; + case XML_STANDALONE_NO: + orig_standalone = 0; + break; + case XML_STANDALONE_NO_VALUE: + orig_standalone = -1; + break; + case XML_STANDALONE_OMITTED: + /* leave original value */ + break; + } + + initStringInfo(&buf); + print_xml_decl(&buf, orig_version, 0, orig_standalone); + appendStringInfoString(&buf, str + len); + + return stringinfo_to_xmltype(&buf); +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +/* + * Validate document (given as string) against DTD (given as external link) + * + * This has been removed because it is a security hole: unprivileged users + * should not be able to use Postgres to fetch arbitrary external files, + * which unfortunately is exactly what libxml is willing to do with the DTD + * parameter. + */ +Datum +xmlvalidate(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xmlvalidate is not implemented"))); + return 0; +} + + +bool +xml_is_document(xmltype *arg) +{ +#ifdef USE_LIBXML + bool result; + volatile xmlDocPtr doc = NULL; + MemoryContext ccxt = CurrentMemoryContext; + + /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ + PG_TRY(); + { + doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding()); + result = true; + } + PG_CATCH(); + { + ErrorData *errdata; + MemoryContext ecxt; + + ecxt = MemoryContextSwitchTo(ccxt); + errdata = CopyErrorData(); + if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT) + { + FlushErrorState(); + result = false; + } + else + { + MemoryContextSwitchTo(ecxt); + PG_RE_THROW(); + } + } + PG_END_TRY(); + + if (doc) + xmlFreeDoc(doc); + + return result; +#else /* not USE_LIBXML */ + NO_XML_SUPPORT(); + return false; +#endif /* not USE_LIBXML */ +} + + +#ifdef USE_LIBXML + +/* + * pg_xml_init_library --- set up for use of libxml + * + * This should be called by each function that is about to use libxml + * facilities but doesn't require error handling. It initializes libxml + * and verifies compatibility with the loaded libxml version. These are + * once-per-session activities. + * + * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and + * check) + */ +void +pg_xml_init_library(void) +{ + static bool first_time = true; + + if (first_time) + { + /* Stuff we need do only once per session */ + + /* + * Currently, we have no pure UTF-8 support for internals -- check if + * we can work. + */ + if (sizeof(char) != sizeof(xmlChar)) + ereport(ERROR, + (errmsg("could not initialize XML library"), + errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.", + (int) sizeof(char), (int) sizeof(xmlChar)))); + +#ifdef USE_LIBXMLCONTEXT + /* Set up libxml's memory allocation our way */ + xml_memory_init(); +#endif + + /* Check library compatibility */ + LIBXML_TEST_VERSION; + + first_time = false; + } +} + +/* + * pg_xml_init --- set up for use of libxml and register an error handler + * + * This should be called by each function that is about to use libxml + * facilities and requires error handling. It initializes libxml with + * pg_xml_init_library() and establishes our libxml error handler. + * + * strictness determines which errors are reported and which are ignored. + * + * Calls to this function MUST be followed by a PG_TRY block that guarantees + * that pg_xml_done() is called during either normal or error exit. + * + * This is exported for use by contrib/xml2, as well as other code that might + * wish to share use of this module's libxml error handler. + */ +PgXmlErrorContext * +pg_xml_init(PgXmlStrictness strictness) +{ + PgXmlErrorContext *errcxt; + void *new_errcxt; + + /* Do one-time setup if needed */ + pg_xml_init_library(); + + /* Create error handling context structure */ + errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext)); + errcxt->magic = ERRCXT_MAGIC; + errcxt->strictness = strictness; + errcxt->err_occurred = false; + initStringInfo(&errcxt->err_buf); + + /* + * Save original error handler and install ours. libxml originally didn't + * distinguish between the contexts for generic and for structured error + * handlers. If we're using an old libxml version, we must thus save the + * generic error context, even though we're using a structured error + * handler. + */ + errcxt->saved_errfunc = xmlStructuredError; + +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + errcxt->saved_errcxt = xmlStructuredErrorContext; +#else + errcxt->saved_errcxt = xmlGenericErrorContext; +#endif + + xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler); + + /* + * Verify that xmlSetStructuredErrorFunc set the context variable we + * expected it to. If not, the error context pointer we just saved is not + * the correct thing to restore, and since that leaves us without a way to + * restore the context in pg_xml_done, we must fail. + * + * The only known situation in which this test fails is if we compile with + * headers from a libxml2 that doesn't track the structured error context + * separately (< 2.7.4), but at runtime use a version that does, or vice + * versa. The libxml2 authors did not treat that change as constituting + * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library + * fails to protect us from this. + */ + +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + new_errcxt = xmlStructuredErrorContext; +#else + new_errcxt = xmlGenericErrorContext; +#endif + + if (new_errcxt != (void *) errcxt) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not set up XML error handler"), + errhint("This probably indicates that the version of libxml2" + " being used is not compatible with the libxml2" + " header files that PostgreSQL was built with."))); + + /* + * Also, install an entity loader to prevent unwanted fetches of external + * files and URLs. + */ + errcxt->saved_entityfunc = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(xmlPgEntityLoader); + + return errcxt; +} + + +/* + * pg_xml_done --- restore previous libxml error handling + * + * Resets libxml's global error-handling state to what it was before + * pg_xml_init() was called. + * + * This routine verifies that all pending errors have been dealt with + * (in assert-enabled builds, anyway). + */ +void +pg_xml_done(PgXmlErrorContext *errcxt, bool isError) +{ + void *cur_errcxt; + + /* An assert seems like enough protection here */ + Assert(errcxt->magic == ERRCXT_MAGIC); + + /* + * In a normal exit, there should be no un-handled libxml errors. But we + * shouldn't try to enforce this during error recovery, since the longjmp + * could have been thrown before xml_ereport had a chance to run. + */ + Assert(!errcxt->err_occurred || isError); + + /* + * Check that libxml's global state is correct, warn if not. This is a + * real test and not an Assert because it has a higher probability of + * happening. + */ +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + cur_errcxt = xmlStructuredErrorContext; +#else + cur_errcxt = xmlGenericErrorContext; +#endif + + if (cur_errcxt != (void *) errcxt) + elog(WARNING, "libxml error handling state is out of sync with xml.c"); + + /* Restore the saved handlers */ + xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc); + xmlSetExternalEntityLoader(errcxt->saved_entityfunc); + + /* + * Mark the struct as invalid, just in case somebody somehow manages to + * call xml_errorHandler or xml_ereport with it. + */ + errcxt->magic = 0; + + /* Release memory */ + pfree(errcxt->err_buf.data); + pfree(errcxt); +} + + +/* + * pg_xml_error_occurred() --- test the error flag + */ +bool +pg_xml_error_occurred(PgXmlErrorContext *errcxt) +{ + return errcxt->err_occurred; +} + + +/* + * SQL/XML allows storing "XML documents" or "XML content". "XML + * documents" are specified by the XML specification and are parsed + * easily by libxml. "XML content" is specified by SQL/XML as the + * production "XMLDecl? content". But libxml can only parse the + * "content" part, so we have to parse the XML declaration ourselves + * to complete this. + */ + +#define CHECK_XML_SPACE(p) \ + do { \ + if (!xmlIsBlank_ch(*(p))) \ + return XML_ERR_SPACE_REQUIRED; \ + } while (0) + +#define SKIP_XML_SPACE(p) \ + while (xmlIsBlank_ch(*(p))) (p)++ + +/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ +/* Beware of multiple evaluations of argument! */ +#define PG_XMLISNAMECHAR(c) \ + (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \ + || xmlIsDigit_ch(c) \ + || c == '.' || c == '-' || c == '_' || c == ':' \ + || xmlIsCombiningQ(c) \ + || xmlIsExtender_ch(c)) + +/* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */ +static xmlChar * +xml_pnstrdup(const xmlChar *str, size_t len) +{ + xmlChar *result; + + result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); + memcpy(result, str, len * sizeof(xmlChar)); + result[len] = 0; + return result; +} + +/* Ditto, except input is char* */ +static xmlChar * +pg_xmlCharStrndup(const char *str, size_t len) +{ + xmlChar *result; + + result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); + memcpy(result, str, len); + result[len] = '\0'; + + return result; +} + +/* + * Copy xmlChar string to PostgreSQL-owned memory, freeing the input. + * + * The input xmlChar is freed regardless of success of the copy. + */ +static char * +xml_pstrdup_and_free(xmlChar *str) +{ + char *result; + + if (str) + { + PG_TRY(); + { + result = pstrdup((char *) str); + } + PG_FINALLY(); + { + xmlFree(str); + } + PG_END_TRY(); + } + else + result = NULL; + + return result; +} + +/* + * str is the null-terminated input string. Remaining arguments are + * output arguments; each can be NULL if value is not wanted. + * version and encoding are returned as locally-palloc'd strings. + * Result is 0 if OK, an error code if not. + */ +static int +parse_xml_decl(const xmlChar *str, size_t *lenp, + xmlChar **version, xmlChar **encoding, int *standalone) +{ + const xmlChar *p; + const xmlChar *save_p; + size_t len; + int utf8char; + int utf8len; + + /* + * Only initialize libxml. We don't need error handling here, but we do + * need to make sure libxml is initialized before calling any of its + * functions. Note that this is safe (and a no-op) if caller has already + * done pg_xml_init(). + */ + pg_xml_init_library(); + + /* Initialize output arguments to "not present" */ + if (version) + *version = NULL; + if (encoding) + *encoding = NULL; + if (standalone) + *standalone = -1; + + p = str; + + if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0) + goto finished; + + /* + * If next char is a name char, it's a PI like <?xml-stylesheet ...?> + * rather than an XMLDecl, so we have done what we came to do and found no + * XMLDecl. + * + * We need an input length value for xmlGetUTF8Char, but there's no need + * to count the whole document size, so use strnlen not strlen. + */ + utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN); + utf8char = xmlGetUTF8Char(p + 5, &utf8len); + if (PG_XMLISNAMECHAR(utf8char)) + goto finished; + + p += 5; + + /* version */ + CHECK_XML_SPACE(p); + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0) + return XML_ERR_VERSION_MISSING; + p += 7; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_VERSION_MISSING; + p += 1; + SKIP_XML_SPACE(p); + + if (*p == '\'' || *p == '"') + { + const xmlChar *q; + + q = xmlStrchr(p + 1, *p); + if (!q) + return XML_ERR_VERSION_MISSING; + + if (version) + *version = xml_pnstrdup(p + 1, q - p - 1); + p = q + 1; + } + else + return XML_ERR_VERSION_MISSING; + + /* encoding */ + save_p = p; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0) + { + CHECK_XML_SPACE(save_p); + p += 8; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_MISSING_ENCODING; + p += 1; + SKIP_XML_SPACE(p); + + if (*p == '\'' || *p == '"') + { + const xmlChar *q; + + q = xmlStrchr(p + 1, *p); + if (!q) + return XML_ERR_MISSING_ENCODING; + + if (encoding) + *encoding = xml_pnstrdup(p + 1, q - p - 1); + p = q + 1; + } + else + return XML_ERR_MISSING_ENCODING; + } + else + { + p = save_p; + } + + /* standalone */ + save_p = p; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0) + { + CHECK_XML_SPACE(save_p); + p += 10; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_STANDALONE_VALUE; + p += 1; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 || + xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0) + { + if (standalone) + *standalone = 1; + p += 5; + } + else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 || + xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0) + { + if (standalone) + *standalone = 0; + p += 4; + } + else + return XML_ERR_STANDALONE_VALUE; + } + else + { + p = save_p; + } + + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0) + return XML_ERR_XMLDECL_NOT_FINISHED; + p += 2; + +finished: + len = p - str; + + for (p = str; p < str + len; p++) + if (*p > 127) + return XML_ERR_INVALID_CHAR; + + if (lenp) + *lenp = len; + + return XML_ERR_OK; +} + + +/* + * Write an XML declaration. On output, we adjust the XML declaration + * as follows. (These rules are the moral equivalent of the clause + * "Serialization of an XML value" in the SQL standard.) + * + * We try to avoid generating an XML declaration if possible. This is + * so that you don't get trivial things like xml '<foo/>' resulting in + * '<?xml version="1.0"?><foo/>', which would surely be annoying. We + * must provide a declaration if the standalone property is specified + * or if we include an encoding declaration. If we have a + * declaration, we must specify a version (XML requires this). + * Otherwise we only make a declaration if the version is not "1.0", + * which is the default version specified in SQL:2003. + */ +static bool +print_xml_decl(StringInfo buf, const xmlChar *version, + pg_enc encoding, int standalone) +{ + if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0) + || (encoding && encoding != PG_UTF8) + || standalone != -1) + { + appendStringInfoString(buf, "<?xml"); + + if (version) + appendStringInfo(buf, " version=\"%s\"", version); + else + appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION); + + if (encoding && encoding != PG_UTF8) + { + /* + * XXX might be useful to convert this to IANA names (ISO-8859-1 + * instead of LATIN1 etc.); needs field experience + */ + appendStringInfo(buf, " encoding=\"%s\"", + pg_encoding_to_char(encoding)); + } + + if (standalone == 1) + appendStringInfoString(buf, " standalone=\"yes\""); + else if (standalone == 0) + appendStringInfoString(buf, " standalone=\"no\""); + appendStringInfoString(buf, "?>"); + + return true; + } + else + return false; +} + +/* + * Test whether an input that is to be parsed as CONTENT contains a DTD. + * + * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not + * satisfied by a document with a DTD, which is a bit of a wart, as it means + * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and + * later fix that, by redefining content with reference to the "more + * permissive" Document Node of the XQuery/XPath Data Model, such that any + * DOCUMENT value is indeed also a CONTENT value. That definition is more + * useful, as CONTENT becomes usable for parsing input of unknown form (think + * pg_restore). + * + * As used below in parse_xml when parsing for CONTENT, libxml does not give + * us the 2006+ behavior, but only the 2003; it will choke if the input has + * a DTD. But we can provide the 2006+ definition of CONTENT easily enough, + * by detecting this case first and simply doing the parse as DOCUMENT. + * + * A DTD can be found arbitrarily far in, but that would be a contrived case; + * it will ordinarily start within a few dozen characters. The only things + * that can precede it are an XMLDecl (here, the caller will have called + * parse_xml_decl already), whitespace, comments, and processing instructions. + * This function need only return true if it sees a valid sequence of such + * things leading to <!DOCTYPE. It can simply return false in any other + * cases, including malformed input; that will mean the input gets parsed as + * CONTENT as originally planned, with libxml reporting any errors. + * + * This is only to be called from xml_parse, when pg_xml_init has already + * been called. The input is already in UTF8 encoding. + */ +static bool +xml_doctype_in_content(const xmlChar *str) +{ + const xmlChar *p = str; + + for (;;) + { + const xmlChar *e; + + SKIP_XML_SPACE(p); + if (*p != '<') + return false; + p++; + + if (*p == '!') + { + p++; + + /* if we see <!DOCTYPE, we can return true */ + if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0) + return true; + + /* otherwise, if it's not a comment, fail */ + if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0) + return false; + /* find end of comment: find -- and a > must follow */ + p = xmlStrstr(p + 2, (xmlChar *) "--"); + if (!p || p[2] != '>') + return false; + /* advance over comment, and keep scanning */ + p += 3; + continue; + } + + /* otherwise, if it's not a PI <?target something?>, fail */ + if (*p != '?') + return false; + p++; + + /* find end of PI (the string ?> is forbidden within a PI) */ + e = xmlStrstr(p, (xmlChar *) "?>"); + if (!e) + return false; + + /* advance over PI, keep scanning */ + p = e + 2; + } +} + + +/* + * Convert a C string to XML internal representation + * + * Note: it is caller's responsibility to xmlFreeDoc() the result, + * else a permanent memory leak will ensue! + * + * TODO maybe libxml2's xmlreader is better? (do not construct DOM, + * yet do not use SAX - see xmlreader.c) + */ +static xmlDocPtr +xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, + int encoding) +{ + int32 len; + xmlChar *string; + xmlChar *utf8string; + PgXmlErrorContext *xmlerrcxt; + volatile xmlParserCtxtPtr ctxt = NULL; + volatile xmlDocPtr doc = NULL; + + len = VARSIZE_ANY_EXHDR(data); /* will be useful later */ + string = xml_text2xmlChar(data); + + utf8string = pg_do_encoding_conversion(string, + len, + encoding, + PG_UTF8); + + /* Start up libxml and its parser */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED); + + /* Use a TRY block to ensure we clean up correctly */ + PG_TRY(); + { + bool parse_as_document = false; + int res_code; + size_t count = 0; + xmlChar *version = NULL; + int standalone = 0; + + xmlInitParser(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + + /* Decide whether to parse as document or content */ + if (xmloption_arg == XMLOPTION_DOCUMENT) + parse_as_document = true; + else + { + /* Parse and skip over the XML declaration, if any */ + res_code = parse_xml_decl(utf8string, + &count, &version, NULL, &standalone); + if (res_code != 0) + xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT, + "invalid XML content: invalid XML declaration", + res_code); + + /* Is there a DOCTYPE element? */ + if (xml_doctype_in_content(utf8string + count)) + parse_as_document = true; + } + + if (parse_as_document) + { + /* + * Note, that here we try to apply DTD defaults + * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d: + * 'Default values defined by internal DTD are applied'. As for + * external DTDs, we try to support them too, (see SQL/XML:2008 GR + * 10.16.7.e) + */ + doc = xmlCtxtReadDoc(ctxt, utf8string, + NULL, + "UTF-8", + XML_PARSE_NOENT | XML_PARSE_DTDATTR + | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); + if (doc == NULL || xmlerrcxt->err_occurred) + { + /* Use original option to decide which error code to throw */ + if (xmloption_arg == XMLOPTION_DOCUMENT) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "invalid XML document"); + else + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, + "invalid XML content"); + } + } + else + { + doc = xmlNewDoc(version); + Assert(doc->encoding == NULL); + doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); + doc->standalone = standalone; + + /* allow empty content */ + if (*(utf8string + count)) + { + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, + utf8string + count, NULL); + if (res_code != 0 || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, + "invalid XML content"); + } + } + } + PG_CATCH(); + { + if (doc != NULL) + xmlFreeDoc(doc); + if (ctxt != NULL) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, false); + + return doc; +} + + +/* + * xmlChar<->text conversions + */ +static xmlChar * +xml_text2xmlChar(text *in) +{ + return (xmlChar *) text_to_cstring(in); +} + + +#ifdef USE_LIBXMLCONTEXT + +/* + * Manage the special context used for all libxml allocations (but only + * in special debug builds; see notes at top of file) + */ +static void +xml_memory_init(void) +{ + /* Create memory context if not there already */ + if (LibxmlContext == NULL) + LibxmlContext = AllocSetContextCreate(TopMemoryContext, + "Libxml context", + ALLOCSET_DEFAULT_SIZES); + + /* Re-establish the callbacks even if already set */ + xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup); +} + +/* + * Wrappers for memory management functions + */ +static void * +xml_palloc(size_t size) +{ + return MemoryContextAlloc(LibxmlContext, size); +} + + +static void * +xml_repalloc(void *ptr, size_t size) +{ + return repalloc(ptr, size); +} + + +static void +xml_pfree(void *ptr) +{ + /* At least some parts of libxml assume xmlFree(NULL) is allowed */ + if (ptr) + pfree(ptr); +} + + +static char * +xml_pstrdup(const char *string) +{ + return MemoryContextStrdup(LibxmlContext, string); +} +#endif /* USE_LIBXMLCONTEXT */ + + +/* + * xmlPgEntityLoader --- entity loader callback function + * + * Silently prevent any external entity URL from being loaded. We don't want + * to throw an error, so instead make the entity appear to expand to an empty + * string. + * + * We would prefer to allow loading entities that exist in the system's + * global XML catalog; but the available libxml2 APIs make that a complex + * and fragile task. For now, just shut down all external access. + */ +static xmlParserInputPtr +xmlPgEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) +{ + return xmlNewStringInputStream(ctxt, (const xmlChar *) ""); +} + + +/* + * xml_ereport --- report an XML-related error + * + * The "msg" is the SQL-level message; some can be adopted from the SQL/XML + * standard. This function adds libxml's native error message, if any, as + * detail. + * + * This is exported for modules that want to share the core libxml error + * handler. Note that pg_xml_init() *must* have been called previously. + */ +void +xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg) +{ + char *detail; + + /* Defend against someone passing us a bogus context struct */ + if (errcxt->magic != ERRCXT_MAGIC) + elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext"); + + /* Flag that the current libxml error has been reported */ + errcxt->err_occurred = false; + + /* Include detail only if we have some text from libxml */ + if (errcxt->err_buf.len > 0) + detail = errcxt->err_buf.data; + else + detail = NULL; + + ereport(level, + (errcode(sqlcode), + errmsg_internal("%s", msg), + detail ? errdetail_internal("%s", detail) : 0)); +} + + +/* + * Error handler for libxml errors and warnings + */ +static void +xml_errorHandler(void *data, xmlErrorPtr error) +{ + PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt; + xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL; + xmlNodePtr node = error->node; + const xmlChar *name = (node != NULL && + node->type == XML_ELEMENT_NODE) ? node->name : NULL; + int domain = error->domain; + int level = error->level; + StringInfo errorBuf; + + /* + * Defend against someone passing us a bogus context struct. + * + * We force a backend exit if this check fails because longjmp'ing out of + * libxml would likely render it unsafe to use further. + */ + if (xmlerrcxt->magic != ERRCXT_MAGIC) + elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext"); + + /*---------- + * Older libxml versions report some errors differently. + * First, some errors were previously reported as coming from the parser + * domain but are now reported as coming from the namespace domain. + * Second, some warnings were upgraded to errors. + * We attempt to compensate for that here. + *---------- + */ + switch (error->code) + { + case XML_WAR_NS_URI: + level = XML_ERR_ERROR; + domain = XML_FROM_NAMESPACE; + break; + + case XML_ERR_NS_DECL_ERROR: + case XML_WAR_NS_URI_RELATIVE: + case XML_WAR_NS_COLUMN: + case XML_NS_ERR_XML_NAMESPACE: + case XML_NS_ERR_UNDEFINED_NAMESPACE: + case XML_NS_ERR_QNAME: + case XML_NS_ERR_ATTRIBUTE_REDEFINED: + case XML_NS_ERR_EMPTY: + domain = XML_FROM_NAMESPACE; + break; + } + + /* Decide whether to act on the error or not */ + switch (domain) + { + case XML_FROM_PARSER: + case XML_FROM_NONE: + case XML_FROM_MEMORY: + case XML_FROM_IO: + + /* + * Suppress warnings about undeclared entities. We need to do + * this to avoid problems due to not loading DTD definitions. + */ + if (error->code == XML_WAR_UNDECLARED_ENTITY) + return; + + /* Otherwise, accept error regardless of the parsing purpose */ + break; + + default: + /* Ignore error if only doing well-formedness check */ + if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED) + return; + break; + } + + /* Prepare error message in errorBuf */ + errorBuf = makeStringInfo(); + + if (error->line > 0) + appendStringInfo(errorBuf, "line %d: ", error->line); + if (name != NULL) + appendStringInfo(errorBuf, "element %s: ", name); + if (error->message != NULL) + appendStringInfoString(errorBuf, error->message); + else + appendStringInfoString(errorBuf, "(no message provided)"); + + /* + * Append context information to errorBuf. + * + * xmlParserPrintFileContext() uses libxml's "generic" error handler to + * write the context. Since we don't want to duplicate libxml + * functionality here, we set up a generic error handler temporarily. + * + * We use appendStringInfo() directly as libxml's generic error handler. + * This should work because it has essentially the same signature as + * libxml expects, namely (void *ptr, const char *msg, ...). + */ + if (input != NULL) + { + xmlGenericErrorFunc errFuncSaved = xmlGenericError; + void *errCtxSaved = xmlGenericErrorContext; + + xmlSetGenericErrorFunc((void *) errorBuf, + (xmlGenericErrorFunc) appendStringInfo); + + /* Add context information to errorBuf */ + appendStringInfoLineSeparator(errorBuf); + + xmlParserPrintFileContext(input); + + /* Restore generic error func */ + xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved); + } + + /* Get rid of any trailing newlines in errorBuf */ + chopStringInfoNewlines(errorBuf); + + /* + * Legacy error handling mode. err_occurred is never set, we just add the + * message to err_buf. This mode exists because the xml2 contrib module + * uses our error-handling infrastructure, but we don't want to change its + * behaviour since it's deprecated anyway. This is also why we don't + * distinguish between notices, warnings and errors here --- the old-style + * generic error handler wouldn't have done that either. + */ + if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY) + { + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data, + errorBuf->len); + + pfree(errorBuf->data); + pfree(errorBuf); + return; + } + + /* + * We don't want to ereport() here because that'd probably leave libxml in + * an inconsistent state. Instead, we remember the error and ereport() + * from xml_ereport(). + * + * Warnings and notices can be reported immediately since they won't cause + * a longjmp() out of libxml. + */ + if (level >= XML_ERR_ERROR) + { + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data, + errorBuf->len); + + xmlerrcxt->err_occurred = true; + } + else if (level >= XML_ERR_WARNING) + { + ereport(WARNING, + (errmsg_internal("%s", errorBuf->data))); + } + else + { + ereport(NOTICE, + (errmsg_internal("%s", errorBuf->data))); + } + + pfree(errorBuf->data); + pfree(errorBuf); +} + + +/* + * Wrapper for "ereport" function for XML-related errors. The "msg" + * is the SQL-level message; some can be adopted from the SQL/XML + * standard. This function uses "code" to create a textual detail + * message. At the moment, we only need to cover those codes that we + * may raise in this file. + */ +static void +xml_ereport_by_code(int level, int sqlcode, + const char *msg, int code) +{ + const char *det; + + switch (code) + { + case XML_ERR_INVALID_CHAR: + det = gettext_noop("Invalid character value."); + break; + case XML_ERR_SPACE_REQUIRED: + det = gettext_noop("Space required."); + break; + case XML_ERR_STANDALONE_VALUE: + det = gettext_noop("standalone accepts only 'yes' or 'no'."); + break; + case XML_ERR_VERSION_MISSING: + det = gettext_noop("Malformed declaration: missing version."); + break; + case XML_ERR_MISSING_ENCODING: + det = gettext_noop("Missing encoding in text declaration."); + break; + case XML_ERR_XMLDECL_NOT_FINISHED: + det = gettext_noop("Parsing XML declaration: '?>' expected."); + break; + default: + det = gettext_noop("Unrecognized libxml error code: %d."); + break; + } + + ereport(level, + (errcode(sqlcode), + errmsg_internal("%s", msg), + errdetail(det, code))); +} + + +/* + * Remove all trailing newlines from a StringInfo string + */ +static void +chopStringInfoNewlines(StringInfo str) +{ + while (str->len > 0 && str->data[str->len - 1] == '\n') + str->data[--str->len] = '\0'; +} + + +/* + * Append a newline after removing any existing trailing newlines + */ +static void +appendStringInfoLineSeparator(StringInfo str) +{ + chopStringInfoNewlines(str); + if (str->len > 0) + appendStringInfoChar(str, '\n'); +} + + +/* + * Convert one char in the current server encoding to a Unicode codepoint. + */ +static pg_wchar +sqlchar_to_unicode(const char *s) +{ + char *utf8string; + pg_wchar ret[2]; /* need space for trailing zero */ + + /* note we're not assuming s is null-terminated */ + utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8); + + pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, + pg_encoding_mblen(PG_UTF8, utf8string)); + + if (utf8string != s) + pfree(utf8string); + + return ret[0]; +} + + +static bool +is_valid_xml_namefirst(pg_wchar c) +{ + /* (Letter | '_' | ':') */ + return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) + || c == '_' || c == ':'); +} + + +static bool +is_valid_xml_namechar(pg_wchar c) +{ + /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ + return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) + || xmlIsDigitQ(c) + || c == '.' || c == '-' || c == '_' || c == ':' + || xmlIsCombiningQ(c) + || xmlIsExtenderQ(c)); +} +#endif /* USE_LIBXML */ + + +/* + * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1. + */ +char * +map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, + bool escape_period) +{ +#ifdef USE_LIBXML + StringInfoData buf; + const char *p; + + /* + * SQL/XML doesn't make use of this case anywhere, so it's probably a + * mistake. + */ + Assert(fully_escaped || !escape_period); + + initStringInfo(&buf); + + for (p = ident; *p; p += pg_mblen(p)) + { + if (*p == ':' && (p == ident || fully_escaped)) + appendStringInfoString(&buf, "_x003A_"); + else if (*p == '_' && *(p + 1) == 'x') + appendStringInfoString(&buf, "_x005F_"); + else if (fully_escaped && p == ident && + pg_strncasecmp(p, "xml", 3) == 0) + { + if (*p == 'x') + appendStringInfoString(&buf, "_x0078_"); + else + appendStringInfoString(&buf, "_x0058_"); + } + else if (escape_period && *p == '.') + appendStringInfoString(&buf, "_x002E_"); + else + { + pg_wchar u = sqlchar_to_unicode(p); + + if ((p == ident) + ? !is_valid_xml_namefirst(u) + : !is_valid_xml_namechar(u)) + appendStringInfo(&buf, "_x%04X_", (unsigned int) u); + else + appendBinaryStringInfo(&buf, p, pg_mblen(p)); + } + } + + return buf.data; +#else /* not USE_LIBXML */ + NO_XML_SUPPORT(); + return NULL; +#endif /* not USE_LIBXML */ +} + + +/* + * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3. + */ +char * +map_xml_name_to_sql_identifier(const char *name) +{ + StringInfoData buf; + const char *p; + + initStringInfo(&buf); + + for (p = name; *p; p += pg_mblen(p)) + { + if (*p == '_' && *(p + 1) == 'x' + && isxdigit((unsigned char) *(p + 2)) + && isxdigit((unsigned char) *(p + 3)) + && isxdigit((unsigned char) *(p + 4)) + && isxdigit((unsigned char) *(p + 5)) + && *(p + 6) == '_') + { + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + unsigned int u; + + sscanf(p + 2, "%X", &u); + pg_unicode_to_server(u, (unsigned char *) cbuf); + appendStringInfoString(&buf, cbuf); + p += 6; + } + else + appendBinaryStringInfo(&buf, p, pg_mblen(p)); + } + + return buf.data; +} + +/* + * Map SQL value to XML value; see SQL/XML:2008 section 9.8. + * + * When xml_escape_strings is true, then certain characters in string + * values are replaced by entity references (< etc.), as specified + * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is + * wanted. The false case is mainly useful when the resulting value + * is used with xmlTextWriterWriteAttribute() to write out an + * attribute, because that function does the escaping itself. + */ +char * +map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings) +{ + if (type_is_array_domain(type)) + { + ArrayType *array; + Oid elmtype; + int16 elmlen; + bool elmbyval; + char elmalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + StringInfoData buf; + int i; + + array = DatumGetArrayTypeP(value); + elmtype = ARR_ELEMTYPE(array); + get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign); + + deconstruct_array(array, elmtype, + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, + &num_elems); + + initStringInfo(&buf); + + for (i = 0; i < num_elems; i++) + { + if (elem_nulls[i]) + continue; + appendStringInfoString(&buf, "<element>"); + appendStringInfoString(&buf, + map_sql_value_to_xml_value(elem_values[i], + elmtype, true)); + appendStringInfoString(&buf, "</element>"); + } + + pfree(elem_values); + pfree(elem_nulls); + + return buf.data; + } + else + { + Oid typeOut; + bool isvarlena; + char *str; + + /* + * Flatten domains; the special-case treatments below should apply to, + * eg, domains over boolean not just boolean. + */ + type = getBaseType(type); + + /* + * Special XSD formatting for some data types + */ + switch (type) + { + case BOOLOID: + if (DatumGetBool(value)) + return "true"; + else + return "false"; + + case DATEOID: + { + DateADT date; + struct pg_tm tm; + char buf[MAXDATELEN + 1]; + + date = DatumGetDateADT(value); + /* XSD doesn't support infinite values */ + if (DATE_NOT_FINITE(date)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range"), + errdetail("XML does not support infinite date values."))); + j2date(date + POSTGRES_EPOCH_JDATE, + &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday)); + EncodeDateOnly(&tm, USE_XSD_DATES, buf); + + return pstrdup(buf); + } + + case TIMESTAMPOID: + { + Timestamp timestamp; + struct pg_tm tm; + fsec_t fsec; + char buf[MAXDATELEN + 1]; + + timestamp = DatumGetTimestamp(value); + + /* XSD doesn't support infinite values */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"), + errdetail("XML does not support infinite timestamp values."))); + else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0) + EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + return pstrdup(buf); + } + + case TIMESTAMPTZOID: + { + TimestampTz timestamp; + struct pg_tm tm; + int tz; + fsec_t fsec; + const char *tzn = NULL; + char buf[MAXDATELEN + 1]; + + timestamp = DatumGetTimestamp(value); + + /* XSD doesn't support infinite values */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"), + errdetail("XML does not support infinite timestamp values."))); + else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0) + EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + return pstrdup(buf); + } + +#ifdef USE_LIBXML + case BYTEAOID: + { + bytea *bstr = DatumGetByteaPP(value); + PgXmlErrorContext *xmlerrcxt; + volatile xmlBufferPtr buf = NULL; + volatile xmlTextWriterPtr writer = NULL; + char *result; + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + buf = xmlBufferCreate(); + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlBuffer"); + writer = xmlNewTextWriterMemory(buf, 0); + if (writer == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlTextWriter"); + + if (xmlbinary == XMLBINARY_BASE64) + xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr), + 0, VARSIZE_ANY_EXHDR(bstr)); + else + xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr), + 0, VARSIZE_ANY_EXHDR(bstr)); + + /* we MUST do this now to flush data out to the buffer */ + xmlFreeTextWriter(writer); + writer = NULL; + + result = pstrdup((const char *) xmlBufferContent(buf)); + } + PG_CATCH(); + { + if (writer) + xmlFreeTextWriter(writer); + if (buf) + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, false); + + return result; + } +#endif /* USE_LIBXML */ + + } + + /* + * otherwise, just use the type's native text representation + */ + getTypeOutputInfo(type, &typeOut, &isvarlena); + str = OidOutputFunctionCall(typeOut, value); + + /* ... exactly as-is for XML, and when escaping is not wanted */ + if (type == XMLOID || !xml_escape_strings) + return str; + + /* otherwise, translate special characters as needed */ + return escape_xml(str); + } +} + + +/* + * Escape characters in text that have special meanings in XML. + * + * Returns a palloc'd string. + * + * NB: this is intentionally not dependent on libxml. + */ +char * +escape_xml(const char *str) +{ + StringInfoData buf; + const char *p; + + initStringInfo(&buf); + for (p = str; *p; p++) + { + switch (*p) + { + case '&': + appendStringInfoString(&buf, "&"); + break; + case '<': + appendStringInfoString(&buf, "<"); + break; + case '>': + appendStringInfoString(&buf, ">"); + break; + case '\r': + appendStringInfoString(&buf, "
"); + break; + default: + appendStringInfoCharMacro(&buf, *p); + break; + } + } + return buf.data; +} + + +static char * +_SPI_strdup(const char *s) +{ + size_t len = strlen(s) + 1; + char *ret = SPI_palloc(len); + + memcpy(ret, s, len); + return ret; +} + + +/* + * SQL to XML mapping functions + * + * What follows below was at one point intentionally organized so that + * you can read along in the SQL/XML standard. The functions are + * mostly split up the way the clauses lay out in the standards + * document, and the identifiers are also aligned with the standard + * text. Unfortunately, SQL/XML:2006 reordered the clauses + * differently than SQL/XML:2003, so the order below doesn't make much + * sense anymore. + * + * There are many things going on there: + * + * There are two kinds of mappings: Mapping SQL data (table contents) + * to XML documents, and mapping SQL structure (the "schema") to XML + * Schema. And there are functions that do both at the same time. + * + * Then you can map a database, a schema, or a table, each in both + * ways. This breaks down recursively: Mapping a database invokes + * mapping schemas, which invokes mapping tables, which invokes + * mapping rows, which invokes mapping columns, although you can't + * call the last two from the outside. Because of this, there are a + * number of xyz_internal() functions which are to be called both from + * the function manager wrapper and from some upper layer in a + * recursive call. + * + * See the documentation about what the common function arguments + * nulls, tableforest, and targetns mean. + * + * Some style guidelines for XML output: Use double quotes for quoting + * XML attributes. Indent XML elements by two spaces, but remember + * that a lot of code is called recursively at different levels, so + * it's better not to indent rather than create output that indents + * and outdents weirdly. Add newlines to make the output look nice. + */ + + +/* + * Visibility of objects for XML mappings; see SQL/XML:2008 section + * 4.10.8. + */ + +/* + * Given a query, which must return type oid as first column, produce + * a list of Oids with the query results. + */ +static List * +query_to_oid_list(const char *query) +{ + uint64 i; + List *list = NIL; + int spi_result; + + spi_result = SPI_execute(query, true, 0); + if (spi_result != SPI_OK_SELECT) + elog(ERROR, "SPI_execute returned %s for %s", + SPI_result_code_string(spi_result), query); + + for (i = 0; i < SPI_processed; i++) + { + Datum oid; + bool isnull; + + oid = SPI_getbinval(SPI_tuptable->vals[i], + SPI_tuptable->tupdesc, + 1, + &isnull); + if (!isnull) + list = lappend_oid(list, DatumGetObjectId(oid)); + } + + return list; +} + + +static List * +schema_get_xml_visible_tables(Oid nspid) +{ + StringInfoData query; + + initStringInfo(&query); + appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class" + " WHERE relnamespace = %u AND relkind IN (" + CppAsString2(RELKIND_RELATION) "," + CppAsString2(RELKIND_MATVIEW) "," + CppAsString2(RELKIND_VIEW) ")" + " AND pg_catalog.has_table_privilege (oid, 'SELECT')" + " ORDER BY relname;", nspid); + + return query_to_oid_list(query.data); +} + + +/* + * Including the system schemas is probably not useful for a database + * mapping. + */ +#define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')" + +#define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE + + +static List * +database_get_xml_visible_schemas(void) +{ + return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;"); +} + + +static List * +database_get_xml_visible_tables(void) +{ + /* At the moment there is no order required here. */ + return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class" + " WHERE relkind IN (" + CppAsString2(RELKIND_RELATION) "," + CppAsString2(RELKIND_MATVIEW) "," + CppAsString2(RELKIND_VIEW) ")" + " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')" + " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");"); +} + + +/* + * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008 + * section 9.11. + */ + +static StringInfo +table_to_xml_internal(Oid relid, + const char *xmlschema, bool nulls, bool tableforest, + const char *targetns, bool top_level) +{ + StringInfoData query; + + initStringInfo(&query); + appendStringInfo(&query, "SELECT * FROM %s", + DatumGetCString(DirectFunctionCall1(regclassout, + ObjectIdGetDatum(relid)))); + return query_to_xml_internal(query.data, get_rel_name(relid), + xmlschema, nulls, tableforest, + targetns, top_level); +} + + +Datum +table_to_xml(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL, + nulls, tableforest, + targetns, true))); +} + + +Datum +query_to_xml(PG_FUNCTION_ARGS) +{ + char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL, + NULL, nulls, tableforest, + targetns, true))); +} + + +Datum +cursor_to_xml(PG_FUNCTION_ARGS) +{ + char *name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + int32 count = PG_GETARG_INT32(1); + bool nulls = PG_GETARG_BOOL(2); + bool tableforest = PG_GETARG_BOOL(3); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4)); + + StringInfoData result; + Portal portal; + uint64 i; + + initStringInfo(&result); + + if (!tableforest) + { + xmldata_root_element_start(&result, "table", NULL, targetns, true); + appendStringInfoChar(&result, '\n'); + } + + SPI_connect(); + portal = SPI_cursor_find(name); + if (portal == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_CURSOR), + errmsg("cursor \"%s\" does not exist", name))); + + SPI_cursor_fetch(portal, true, count); + for (i = 0; i < SPI_processed; i++) + SPI_sql_row_to_xmlelement(i, &result, NULL, nulls, + tableforest, targetns, true); + + SPI_finish(); + + if (!tableforest) + xmldata_root_element_end(&result, "table"); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&result)); +} + + +/* + * Write the start tag of the root element of a data mapping. + * + * top_level means that this is the very top level of the eventual + * output. For example, when the user calls table_to_xml, then a call + * with a table name to this function is the top level. When the user + * calls database_to_xml, then a call with a schema name to this + * function is not the top level. If top_level is false, then the XML + * namespace declarations are omitted, because they supposedly already + * appeared earlier in the output. Repeating them is not wrong, but + * it looks ugly. + */ +static void +xmldata_root_element_start(StringInfo result, const char *eltname, + const char *xmlschema, const char *targetns, + bool top_level) +{ + /* This isn't really wrong but currently makes no sense. */ + Assert(top_level || !xmlschema); + + appendStringInfo(result, "<%s", eltname); + if (top_level) + { + appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\""); + if (strlen(targetns) > 0) + appendStringInfo(result, " xmlns=\"%s\"", targetns); + } + if (xmlschema) + { + /* FIXME: better targets */ + if (strlen(targetns) > 0) + appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns); + else + appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\""); + } + appendStringInfoString(result, ">\n"); +} + + +static void +xmldata_root_element_end(StringInfo result, const char *eltname) +{ + appendStringInfo(result, "</%s>\n", eltname); +} + + +static StringInfo +query_to_xml_internal(const char *query, char *tablename, + const char *xmlschema, bool nulls, bool tableforest, + const char *targetns, bool top_level) +{ + StringInfo result; + char *xmltn; + uint64 i; + + if (tablename) + xmltn = map_sql_identifier_to_xml_name(tablename, true, false); + else + xmltn = "table"; + + result = makeStringInfo(); + + SPI_connect(); + if (SPI_execute(query, true, 0) != SPI_OK_SELECT) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid query"))); + + if (!tableforest) + { + xmldata_root_element_start(result, xmltn, xmlschema, + targetns, top_level); + appendStringInfoChar(result, '\n'); + } + + if (xmlschema) + appendStringInfo(result, "%s\n\n", xmlschema); + + for (i = 0; i < SPI_processed; i++) + SPI_sql_row_to_xmlelement(i, result, tablename, nulls, + tableforest, targetns, top_level); + + if (!tableforest) + xmldata_root_element_end(result, xmltn); + + SPI_finish(); + + return result; +} + + +Datum +table_to_xmlschema(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + const char *result; + Relation rel; + + rel = table_open(relid, AccessShareLock); + result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls, + tableforest, targetns); + table_close(rel, NoLock); + + PG_RETURN_XML_P(cstring_to_xmltype(result)); +} + + +Datum +query_to_xmlschema(PG_FUNCTION_ARGS) +{ + char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + const char *result; + SPIPlanPtr plan; + Portal portal; + + SPI_connect(); + + if ((plan = SPI_prepare(query, 0, NULL)) == NULL) + elog(ERROR, "SPI_prepare(\"%s\") failed", query); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) + elog(ERROR, "SPI_cursor_open(\"%s\") failed", query); + + result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, + InvalidOid, nulls, + tableforest, targetns)); + SPI_cursor_close(portal); + SPI_finish(); + + PG_RETURN_XML_P(cstring_to_xmltype(result)); +} + + +Datum +cursor_to_xmlschema(PG_FUNCTION_ARGS) +{ + char *name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + const char *xmlschema; + Portal portal; + + SPI_connect(); + portal = SPI_cursor_find(name); + if (portal == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_CURSOR), + errmsg("cursor \"%s\" does not exist", name))); + + xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, + InvalidOid, nulls, + tableforest, targetns)); + SPI_finish(); + + PG_RETURN_XML_P(cstring_to_xmltype(xmlschema)); +} + + +Datum +table_to_xml_and_xmlschema(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + Relation rel; + const char *xmlschema; + + rel = table_open(relid, AccessShareLock); + xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls, + tableforest, targetns); + table_close(rel, NoLock); + + PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, + xmlschema, nulls, tableforest, + targetns, true))); +} + + +Datum +query_to_xml_and_xmlschema(PG_FUNCTION_ARGS) +{ + char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + const char *xmlschema; + SPIPlanPtr plan; + Portal portal; + + SPI_connect(); + + if ((plan = SPI_prepare(query, 0, NULL)) == NULL) + elog(ERROR, "SPI_prepare(\"%s\") failed", query); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) + elog(ERROR, "SPI_cursor_open(\"%s\") failed", query); + + xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, + InvalidOid, nulls, tableforest, targetns)); + SPI_cursor_close(portal); + SPI_finish(); + + PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL, + xmlschema, nulls, tableforest, + targetns, true))); +} + + +/* + * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008 + * sections 9.13, 9.14. + */ + +static StringInfo +schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls, + bool tableforest, const char *targetns, bool top_level) +{ + StringInfo result; + char *xmlsn; + List *relid_list; + ListCell *cell; + + xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid), + true, false); + result = makeStringInfo(); + + xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level); + appendStringInfoChar(result, '\n'); + + if (xmlschema) + appendStringInfo(result, "%s\n\n", xmlschema); + + SPI_connect(); + + relid_list = schema_get_xml_visible_tables(nspid); + + foreach(cell, relid_list) + { + Oid relid = lfirst_oid(cell); + StringInfo subres; + + subres = table_to_xml_internal(relid, NULL, nulls, tableforest, + targetns, false); + + appendBinaryStringInfo(result, subres->data, subres->len); + appendStringInfoChar(result, '\n'); + } + + SPI_finish(); + + xmldata_root_element_end(result, xmlsn); + + return result; +} + + +Datum +schema_to_xml(PG_FUNCTION_ARGS) +{ + Name name = PG_GETARG_NAME(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + char *schemaname; + Oid nspid; + + schemaname = NameStr(*name); + nspid = LookupExplicitNamespace(schemaname, false); + + PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL, + nulls, tableforest, targetns, true))); +} + + +/* + * Write the start element of the root element of an XML Schema mapping. + */ +static void +xsd_schema_element_start(StringInfo result, const char *targetns) +{ + appendStringInfoString(result, + "<xsd:schema\n" + " xmlns:xsd=\"" NAMESPACE_XSD "\""); + if (strlen(targetns) > 0) + appendStringInfo(result, + "\n" + " targetNamespace=\"%s\"\n" + " elementFormDefault=\"qualified\"", + targetns); + appendStringInfoString(result, + ">\n\n"); +} + + +static void +xsd_schema_element_end(StringInfo result) +{ + appendStringInfoString(result, "</xsd:schema>"); +} + + +static StringInfo +schema_to_xmlschema_internal(const char *schemaname, bool nulls, + bool tableforest, const char *targetns) +{ + Oid nspid; + List *relid_list; + List *tupdesc_list; + ListCell *cell; + StringInfo result; + + result = makeStringInfo(); + + nspid = LookupExplicitNamespace(schemaname, false); + + xsd_schema_element_start(result, targetns); + + SPI_connect(); + + relid_list = schema_get_xml_visible_tables(nspid); + + tupdesc_list = NIL; + foreach(cell, relid_list) + { + Relation rel; + + rel = table_open(lfirst_oid(cell), AccessShareLock); + tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att)); + table_close(rel, NoLock); + } + + appendStringInfoString(result, + map_sql_typecoll_to_xmlschema_types(tupdesc_list)); + + appendStringInfoString(result, + map_sql_schema_to_xmlschema_types(nspid, relid_list, + nulls, tableforest, targetns)); + + xsd_schema_element_end(result); + + SPI_finish(); + + return result; +} + + +Datum +schema_to_xmlschema(PG_FUNCTION_ARGS) +{ + Name name = PG_GETARG_NAME(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name), + nulls, tableforest, targetns))); +} + + +Datum +schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS) +{ + Name name = PG_GETARG_NAME(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + char *schemaname; + Oid nspid; + StringInfo xmlschema; + + schemaname = NameStr(*name); + nspid = LookupExplicitNamespace(schemaname, false); + + xmlschema = schema_to_xmlschema_internal(schemaname, nulls, + tableforest, targetns); + + PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, + xmlschema->data, nulls, + tableforest, targetns, true))); +} + + +/* + * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008 + * sections 9.16, 9.17. + */ + +static StringInfo +database_to_xml_internal(const char *xmlschema, bool nulls, + bool tableforest, const char *targetns) +{ + StringInfo result; + List *nspid_list; + ListCell *cell; + char *xmlcn; + + xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId), + true, false); + result = makeStringInfo(); + + xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true); + appendStringInfoChar(result, '\n'); + + if (xmlschema) + appendStringInfo(result, "%s\n\n", xmlschema); + + SPI_connect(); + + nspid_list = database_get_xml_visible_schemas(); + + foreach(cell, nspid_list) + { + Oid nspid = lfirst_oid(cell); + StringInfo subres; + + subres = schema_to_xml_internal(nspid, NULL, nulls, + tableforest, targetns, false); + + appendBinaryStringInfo(result, subres->data, subres->len); + appendStringInfoChar(result, '\n'); + } + + SPI_finish(); + + xmldata_root_element_end(result, xmlcn); + + return result; +} + + +Datum +database_to_xml(PG_FUNCTION_ARGS) +{ + bool nulls = PG_GETARG_BOOL(0); + bool tableforest = PG_GETARG_BOOL(1); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls, + tableforest, targetns))); +} + + +static StringInfo +database_to_xmlschema_internal(bool nulls, bool tableforest, + const char *targetns) +{ + List *relid_list; + List *nspid_list; + List *tupdesc_list; + ListCell *cell; + StringInfo result; + + result = makeStringInfo(); + + xsd_schema_element_start(result, targetns); + + SPI_connect(); + + relid_list = database_get_xml_visible_tables(); + nspid_list = database_get_xml_visible_schemas(); + + tupdesc_list = NIL; + foreach(cell, relid_list) + { + Relation rel; + + rel = table_open(lfirst_oid(cell), AccessShareLock); + tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att)); + table_close(rel, NoLock); + } + + appendStringInfoString(result, + map_sql_typecoll_to_xmlschema_types(tupdesc_list)); + + appendStringInfoString(result, + map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns)); + + xsd_schema_element_end(result); + + SPI_finish(); + + return result; +} + + +Datum +database_to_xmlschema(PG_FUNCTION_ARGS) +{ + bool nulls = PG_GETARG_BOOL(0); + bool tableforest = PG_GETARG_BOOL(1); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls, + tableforest, targetns))); +} + + +Datum +database_to_xml_and_xmlschema(PG_FUNCTION_ARGS) +{ + bool nulls = PG_GETARG_BOOL(0); + bool tableforest = PG_GETARG_BOOL(1); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); + StringInfo xmlschema; + + xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns); + + PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data, + nulls, tableforest, targetns))); +} + + +/* + * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section + * 9.2. + */ +static char * +map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d) +{ + StringInfoData result; + + initStringInfo(&result); + + if (a) + appendStringInfoString(&result, + map_sql_identifier_to_xml_name(a, true, true)); + if (b) + appendStringInfo(&result, ".%s", + map_sql_identifier_to_xml_name(b, true, true)); + if (c) + appendStringInfo(&result, ".%s", + map_sql_identifier_to_xml_name(c, true, true)); + if (d) + appendStringInfo(&result, ".%s", + map_sql_identifier_to_xml_name(d, true, true)); + + return result.data; +} + + +/* + * Map an SQL table to an XML Schema document; see SQL/XML:2008 + * section 9.11. + * + * Map an SQL table to XML Schema data types; see SQL/XML:2008 section + * 9.9. + */ +static const char * +map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls, + bool tableforest, const char *targetns) +{ + int i; + char *xmltn; + char *tabletypename; + char *rowtypename; + StringInfoData result; + + initStringInfo(&result); + + if (OidIsValid(relid)) + { + HeapTuple tuple; + Form_pg_class reltuple; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + reltuple = (Form_pg_class) GETSTRUCT(tuple); + + xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname), + true, false); + + tabletypename = map_multipart_sql_identifier_to_xml_name("TableType", + get_database_name(MyDatabaseId), + get_namespace_name(reltuple->relnamespace), + NameStr(reltuple->relname)); + + rowtypename = map_multipart_sql_identifier_to_xml_name("RowType", + get_database_name(MyDatabaseId), + get_namespace_name(reltuple->relnamespace), + NameStr(reltuple->relname)); + + ReleaseSysCache(tuple); + } + else + { + if (tableforest) + xmltn = "row"; + else + xmltn = "table"; + + tabletypename = "TableType"; + rowtypename = "RowType"; + } + + xsd_schema_element_start(&result, targetns); + + appendStringInfoString(&result, + map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc))); + + appendStringInfo(&result, + "<xsd:complexType name=\"%s\">\n" + " <xsd:sequence>\n", + rowtypename); + + for (i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + + if (att->attisdropped) + continue; + appendStringInfo(&result, + " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n", + map_sql_identifier_to_xml_name(NameStr(att->attname), + true, false), + map_sql_type_to_xml_name(att->atttypid, -1), + nulls ? " nillable=\"true\"" : " minOccurs=\"0\""); + } + + appendStringInfoString(&result, + " </xsd:sequence>\n" + "</xsd:complexType>\n\n"); + + if (!tableforest) + { + appendStringInfo(&result, + "<xsd:complexType name=\"%s\">\n" + " <xsd:sequence>\n" + " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n" + " </xsd:sequence>\n" + "</xsd:complexType>\n\n", + tabletypename, rowtypename); + + appendStringInfo(&result, + "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", + xmltn, tabletypename); + } + else + appendStringInfo(&result, + "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", + xmltn, rowtypename); + + xsd_schema_element_end(&result); + + return result.data; +} + + +/* + * Map an SQL schema to XML Schema data types; see SQL/XML:2008 + * section 9.12. + */ +static const char * +map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls, + bool tableforest, const char *targetns) +{ + char *dbname; + char *nspname; + char *xmlsn; + char *schematypename; + StringInfoData result; + ListCell *cell; + + dbname = get_database_name(MyDatabaseId); + nspname = get_namespace_name(nspid); + + initStringInfo(&result); + + xmlsn = map_sql_identifier_to_xml_name(nspname, true, false); + + schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType", + dbname, + nspname, + NULL); + + appendStringInfo(&result, + "<xsd:complexType name=\"%s\">\n", schematypename); + if (!tableforest) + appendStringInfoString(&result, + " <xsd:all>\n"); + else + appendStringInfoString(&result, + " <xsd:sequence>\n"); + + foreach(cell, relid_list) + { + Oid relid = lfirst_oid(cell); + char *relname = get_rel_name(relid); + char *xmltn = map_sql_identifier_to_xml_name(relname, true, false); + char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType", + dbname, + nspname, + relname); + + if (!tableforest) + appendStringInfo(&result, + " <xsd:element name=\"%s\" type=\"%s\"/>\n", + xmltn, tabletypename); + else + appendStringInfo(&result, + " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n", + xmltn, tabletypename); + } + + if (!tableforest) + appendStringInfoString(&result, + " </xsd:all>\n"); + else + appendStringInfoString(&result, + " </xsd:sequence>\n"); + appendStringInfoString(&result, + "</xsd:complexType>\n\n"); + + appendStringInfo(&result, + "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", + xmlsn, schematypename); + + return result.data; +} + + +/* + * Map an SQL catalog to XML Schema data types; see SQL/XML:2008 + * section 9.15. + */ +static const char * +map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls, + bool tableforest, const char *targetns) +{ + char *dbname; + char *xmlcn; + char *catalogtypename; + StringInfoData result; + ListCell *cell; + + dbname = get_database_name(MyDatabaseId); + + initStringInfo(&result); + + xmlcn = map_sql_identifier_to_xml_name(dbname, true, false); + + catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType", + dbname, + NULL, + NULL); + + appendStringInfo(&result, + "<xsd:complexType name=\"%s\">\n", catalogtypename); + appendStringInfoString(&result, + " <xsd:all>\n"); + + foreach(cell, nspid_list) + { + Oid nspid = lfirst_oid(cell); + char *nspname = get_namespace_name(nspid); + char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false); + char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType", + dbname, + nspname, + NULL); + + appendStringInfo(&result, + " <xsd:element name=\"%s\" type=\"%s\"/>\n", + xmlsn, schematypename); + } + + appendStringInfoString(&result, + " </xsd:all>\n"); + appendStringInfoString(&result, + "</xsd:complexType>\n\n"); + + appendStringInfo(&result, + "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", + xmlcn, catalogtypename); + + return result.data; +} + + +/* + * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4. + */ +static const char * +map_sql_type_to_xml_name(Oid typeoid, int typmod) +{ + StringInfoData result; + + initStringInfo(&result); + + switch (typeoid) + { + case BPCHAROID: + if (typmod == -1) + appendStringInfoString(&result, "CHAR"); + else + appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ); + break; + case VARCHAROID: + if (typmod == -1) + appendStringInfoString(&result, "VARCHAR"); + else + appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ); + break; + case NUMERICOID: + if (typmod == -1) + appendStringInfoString(&result, "NUMERIC"); + else + appendStringInfo(&result, "NUMERIC_%d_%d", + ((typmod - VARHDRSZ) >> 16) & 0xffff, + (typmod - VARHDRSZ) & 0xffff); + break; + case INT4OID: + appendStringInfoString(&result, "INTEGER"); + break; + case INT2OID: + appendStringInfoString(&result, "SMALLINT"); + break; + case INT8OID: + appendStringInfoString(&result, "BIGINT"); + break; + case FLOAT4OID: + appendStringInfoString(&result, "REAL"); + break; + case FLOAT8OID: + appendStringInfoString(&result, "DOUBLE"); + break; + case BOOLOID: + appendStringInfoString(&result, "BOOLEAN"); + break; + case TIMEOID: + if (typmod == -1) + appendStringInfoString(&result, "TIME"); + else + appendStringInfo(&result, "TIME_%d", typmod); + break; + case TIMETZOID: + if (typmod == -1) + appendStringInfoString(&result, "TIME_WTZ"); + else + appendStringInfo(&result, "TIME_WTZ_%d", typmod); + break; + case TIMESTAMPOID: + if (typmod == -1) + appendStringInfoString(&result, "TIMESTAMP"); + else + appendStringInfo(&result, "TIMESTAMP_%d", typmod); + break; + case TIMESTAMPTZOID: + if (typmod == -1) + appendStringInfoString(&result, "TIMESTAMP_WTZ"); + else + appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod); + break; + case DATEOID: + appendStringInfoString(&result, "DATE"); + break; + case XMLOID: + appendStringInfoString(&result, "XML"); + break; + default: + { + HeapTuple tuple; + Form_pg_type typtuple; + + tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for type %u", typeoid); + typtuple = (Form_pg_type) GETSTRUCT(tuple); + + appendStringInfoString(&result, + map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT", + get_database_name(MyDatabaseId), + get_namespace_name(typtuple->typnamespace), + NameStr(typtuple->typname))); + + ReleaseSysCache(tuple); + } + } + + return result.data; +} + + +/* + * Map a collection of SQL data types to XML Schema data types; see + * SQL/XML:2008 section 9.7. + */ +static const char * +map_sql_typecoll_to_xmlschema_types(List *tupdesc_list) +{ + List *uniquetypes = NIL; + int i; + StringInfoData result; + ListCell *cell0; + + /* extract all column types used in the set of TupleDescs */ + foreach(cell0, tupdesc_list) + { + TupleDesc tupdesc = (TupleDesc) lfirst(cell0); + + for (i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + + if (att->attisdropped) + continue; + uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid); + } + } + + /* add base types of domains */ + foreach(cell0, uniquetypes) + { + Oid typid = lfirst_oid(cell0); + Oid basetypid = getBaseType(typid); + + if (basetypid != typid) + uniquetypes = list_append_unique_oid(uniquetypes, basetypid); + } + + /* Convert to textual form */ + initStringInfo(&result); + + foreach(cell0, uniquetypes) + { + appendStringInfo(&result, "%s\n", + map_sql_type_to_xmlschema_type(lfirst_oid(cell0), + -1)); + } + + return result.data; +} + + +/* + * Map an SQL data type to a named XML Schema data type; see + * SQL/XML:2008 sections 9.5 and 9.6. + * + * (The distinction between 9.5 and 9.6 is basically that 9.6 adds + * a name attribute, which this function does. The name-less version + * 9.5 doesn't appear to be required anywhere.) + */ +static const char * +map_sql_type_to_xmlschema_type(Oid typeoid, int typmod) +{ + StringInfoData result; + const char *typename = map_sql_type_to_xml_name(typeoid, typmod); + + initStringInfo(&result); + + if (typeoid == XMLOID) + { + appendStringInfoString(&result, + "<xsd:complexType mixed=\"true\">\n" + " <xsd:sequence>\n" + " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n" + " </xsd:sequence>\n" + "</xsd:complexType>\n"); + } + else + { + appendStringInfo(&result, + "<xsd:simpleType name=\"%s\">\n", typename); + + switch (typeoid) + { + case BPCHAROID: + case VARCHAROID: + case TEXTOID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:string\">\n"); + if (typmod != -1) + appendStringInfo(&result, + " <xsd:maxLength value=\"%d\"/>\n", + typmod - VARHDRSZ); + appendStringInfoString(&result, " </xsd:restriction>\n"); + break; + + case BYTEAOID: + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:%s\">\n" + " </xsd:restriction>\n", + xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary"); + break; + + case NUMERICOID: + if (typmod != -1) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:decimal\">\n" + " <xsd:totalDigits value=\"%d\"/>\n" + " <xsd:fractionDigits value=\"%d\"/>\n" + " </xsd:restriction>\n", + ((typmod - VARHDRSZ) >> 16) & 0xffff, + (typmod - VARHDRSZ) & 0xffff); + break; + + case INT2OID: + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:short\">\n" + " <xsd:maxInclusive value=\"%d\"/>\n" + " <xsd:minInclusive value=\"%d\"/>\n" + " </xsd:restriction>\n", + SHRT_MAX, SHRT_MIN); + break; + + case INT4OID: + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:int\">\n" + " <xsd:maxInclusive value=\"%d\"/>\n" + " <xsd:minInclusive value=\"%d\"/>\n" + " </xsd:restriction>\n", + INT_MAX, INT_MIN); + break; + + case INT8OID: + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:long\">\n" + " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n" + " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n" + " </xsd:restriction>\n", + (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1, + (((uint64) 1) << (sizeof(int64) * 8 - 1))); + break; + + case FLOAT4OID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n"); + break; + + case FLOAT8OID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n"); + break; + + case BOOLOID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n"); + break; + + case TIMEOID: + case TIMETZOID: + { + const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : ""); + + if (typmod == -1) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:time\">\n" + " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n" + " </xsd:restriction>\n", tz); + else if (typmod == 0) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:time\">\n" + " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n" + " </xsd:restriction>\n", tz); + else + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:time\">\n" + " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n" + " </xsd:restriction>\n", typmod - VARHDRSZ, tz); + break; + } + + case TIMESTAMPOID: + case TIMESTAMPTZOID: + { + const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : ""); + + if (typmod == -1) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:dateTime\">\n" + " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n" + " </xsd:restriction>\n", tz); + else if (typmod == 0) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:dateTime\">\n" + " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n" + " </xsd:restriction>\n", tz); + else + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:dateTime\">\n" + " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n" + " </xsd:restriction>\n", typmod - VARHDRSZ, tz); + break; + } + + case DATEOID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:date\">\n" + " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n" + " </xsd:restriction>\n"); + break; + + default: + if (get_typtype(typeoid) == TYPTYPE_DOMAIN) + { + Oid base_typeoid; + int32 base_typmod = -1; + + base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod); + + appendStringInfo(&result, + " <xsd:restriction base=\"%s\"/>\n", + map_sql_type_to_xml_name(base_typeoid, base_typmod)); + } + break; + } + appendStringInfoString(&result, "</xsd:simpleType>\n"); + } + + return result.data; +} + + +/* + * Map an SQL row to an XML element, taking the row from the active + * SPI cursor. See also SQL/XML:2008 section 9.10. + */ +static void +SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename, + bool nulls, bool tableforest, + const char *targetns, bool top_level) +{ + int i; + char *xmltn; + + if (tablename) + xmltn = map_sql_identifier_to_xml_name(tablename, true, false); + else + { + if (tableforest) + xmltn = "row"; + else + xmltn = "table"; + } + + if (tableforest) + xmldata_root_element_start(result, xmltn, NULL, targetns, top_level); + else + appendStringInfoString(result, "<row>\n"); + + for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++) + { + char *colname; + Datum colval; + bool isnull; + + colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i), + true, false); + colval = SPI_getbinval(SPI_tuptable->vals[rownum], + SPI_tuptable->tupdesc, + i, + &isnull); + if (isnull) + { + if (nulls) + appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname); + } + else + appendStringInfo(result, " <%s>%s</%s>\n", + colname, + map_sql_value_to_xml_value(colval, + SPI_gettypeid(SPI_tuptable->tupdesc, i), true), + colname); + } + + if (tableforest) + { + xmldata_root_element_end(result, xmltn); + appendStringInfoChar(result, '\n'); + } + else + appendStringInfoString(result, "</row>\n\n"); +} + + +/* + * XPath related functions + */ + +#ifdef USE_LIBXML + +/* + * Convert XML node to text. + * + * For attribute and text nodes, return the escaped text. For anything else, + * dump the whole subtree. + */ +static text * +xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) +{ + xmltype *result = NULL; + + if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE) + { + void (*volatile nodefree) (xmlNodePtr) = NULL; + volatile xmlBufferPtr buf = NULL; + volatile xmlNodePtr cur_copy = NULL; + + PG_TRY(); + { + int bytes; + + buf = xmlBufferCreate(); + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlBuffer"); + + /* + * Produce a dump of the node that we can serialize. xmlNodeDump + * does that, but the result of that function won't contain + * namespace definitions from ancestor nodes, so we first do a + * xmlCopyNode() which duplicates the node along with its required + * namespace definitions. + * + * Some old libxml2 versions such as 2.7.6 produce partially + * broken XML_DOCUMENT_NODE nodes (unset content field) when + * copying them. xmlNodeDump of such a node works fine, but + * xmlFreeNode crashes; set us up to call xmlFreeDoc instead. + */ + cur_copy = xmlCopyNode(cur, 1); + if (cur_copy == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not copy node"); + nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ? + (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode; + + bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0); + if (bytes == -1 || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not dump node"); + + result = xmlBuffer_to_xmltype(buf); + } + PG_FINALLY(); + { + if (nodefree) + nodefree(cur_copy); + if (buf) + xmlBufferFree(buf); + } + PG_END_TRY(); + } + else + { + xmlChar *str; + + str = xmlXPathCastNodeToString(cur); + PG_TRY(); + { + /* Here we rely on XML having the same representation as TEXT */ + char *escaped = escape_xml((char *) str); + + result = (xmltype *) cstring_to_text(escaped); + pfree(escaped); + } + PG_FINALLY(); + { + xmlFree(str); + } + PG_END_TRY(); + } + + return result; +} + +/* + * Convert an XML XPath object (the result of evaluating an XPath expression) + * to an array of xml values, which are appended to astate. The function + * result value is the number of elements in the array. + * + * If "astate" is NULL then we don't generate the array value, but we still + * return the number of elements it would have had. + * + * Nodesets are converted to an array containing the nodes' textual + * representations. Primitive values (float, double, string) are converted + * to a single-element array containing the value's string representation. + */ +static int +xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, + ArrayBuildState *astate, + PgXmlErrorContext *xmlerrcxt) +{ + int result = 0; + Datum datum; + Oid datumtype; + char *result_str; + + switch (xpathobj->type) + { + case XPATH_NODESET: + if (xpathobj->nodesetval != NULL) + { + result = xpathobj->nodesetval->nodeNr; + if (astate != NULL) + { + int i; + + for (i = 0; i < result; i++) + { + datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], + xmlerrcxt)); + (void) accumArrayResult(astate, datum, false, + XMLOID, CurrentMemoryContext); + } + } + } + return result; + + case XPATH_BOOLEAN: + if (astate == NULL) + return 1; + datum = BoolGetDatum(xpathobj->boolval); + datumtype = BOOLOID; + break; + + case XPATH_NUMBER: + if (astate == NULL) + return 1; + datum = Float8GetDatum(xpathobj->floatval); + datumtype = FLOAT8OID; + break; + + case XPATH_STRING: + if (astate == NULL) + return 1; + datum = CStringGetDatum((char *) xpathobj->stringval); + datumtype = CSTRINGOID; + break; + + default: + elog(ERROR, "xpath expression result type %d is unsupported", + xpathobj->type); + return 0; /* keep compiler quiet */ + } + + /* Common code for scalar-value cases */ + result_str = map_sql_value_to_xml_value(datum, datumtype, true); + datum = PointerGetDatum(cstring_to_xmltype(result_str)); + (void) accumArrayResult(astate, datum, false, + XMLOID, CurrentMemoryContext); + return 1; +} + + +/* + * Common code for xpath() and xmlexists() + * + * Evaluate XPath expression and return number of nodes in res_nitems + * and array of XML values in astate. Either of those pointers can be + * NULL if the corresponding result isn't wanted. + * + * It is up to the user to ensure that the XML passed is in fact + * an XML document - XPath doesn't work easily on fragments without + * a context node being known. + */ +static void +xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, + int *res_nitems, ArrayBuildState *astate) +{ + PgXmlErrorContext *xmlerrcxt; + volatile xmlParserCtxtPtr ctxt = NULL; + volatile xmlDocPtr doc = NULL; + volatile xmlXPathContextPtr xpathctx = NULL; + volatile xmlXPathCompExprPtr xpathcomp = NULL; + volatile xmlXPathObjectPtr xpathobj = NULL; + char *datastr; + int32 len; + int32 xpath_len; + xmlChar *string; + xmlChar *xpath_expr; + size_t xmldecl_len = 0; + int i; + int ndim; + Datum *ns_names_uris; + bool *ns_names_uris_nulls; + int ns_count; + + /* + * Namespace mappings are passed as text[]. If an empty array is passed + * (ndim = 0, "0-dimensional"), then there are no namespace mappings. + * Else, a 2-dimensional array with length of the second axis being equal + * to 2 should be passed, i.e., every subarray contains 2 elements, the + * first element defining the name, the second one the URI. Example: + * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2', + * 'http://example2.com']]. + */ + ndim = namespaces ? ARR_NDIM(namespaces) : 0; + if (ndim != 0) + { + int *dims; + + dims = ARR_DIMS(namespaces); + + if (ndim != 2 || dims[1] != 2) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid array for XML namespace mapping"), + errdetail("The array must be two-dimensional with length of the second axis equal to 2."))); + + Assert(ARR_ELEMTYPE(namespaces) == TEXTOID); + + deconstruct_array(namespaces, TEXTOID, -1, false, TYPALIGN_INT, + &ns_names_uris, &ns_names_uris_nulls, + &ns_count); + + Assert((ns_count % 2) == 0); /* checked above */ + ns_count /= 2; /* count pairs only */ + } + else + { + ns_names_uris = NULL; + ns_names_uris_nulls = NULL; + ns_count = 0; + } + + datastr = VARDATA(data); + len = VARSIZE(data) - VARHDRSZ; + xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text); + if (xpath_len == 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("empty XPath expression"))); + + string = pg_xmlCharStrndup(datastr, len); + xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len); + + /* + * In a UTF8 database, skip any xml declaration, which might assert + * another encoding. Ignore parse_xml_decl() failure, letting + * xmlCtxtReadMemory() report parse errors. Documentation disclaims + * xpath() support for non-ASCII data in non-UTF8 databases, so leave + * those scenarios bug-compatible with historical behavior. + */ + if (GetDatabaseEncoding() == PG_UTF8) + parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL); + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + xmlInitParser(); + + /* + * redundant XML parsing (two parsings for the same value during one + * command execution are possible) + */ + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len, + len - xmldecl_len, NULL, NULL, 0); + if (doc == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "could not parse XML document"); + xpathctx = xmlXPathNewContext(doc); + if (xpathctx == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XPath context"); + xpathctx->node = (xmlNodePtr) doc; + + /* register namespaces, if any */ + if (ns_count > 0) + { + for (i = 0; i < ns_count; i++) + { + char *ns_name; + char *ns_uri; + + if (ns_names_uris_nulls[i * 2] || + ns_names_uris_nulls[i * 2 + 1]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("neither namespace name nor URI may be null"))); + ns_name = TextDatumGetCString(ns_names_uris[i * 2]); + ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]); + if (xmlXPathRegisterNs(xpathctx, + (xmlChar *) ns_name, + (xmlChar *) ns_uri) != 0) + ereport(ERROR, /* is this an internal error??? */ + (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"", + ns_name, ns_uri))); + } + } + + xpathcomp = xmlXPathCompile(xpath_expr); + if (xpathcomp == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "invalid XPath expression"); + + /* + * Version 2.6.27 introduces a function named + * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists, + * but we can derive the existence by whether any nodes are returned, + * thereby preventing a library version upgrade and keeping the code + * the same. + */ + xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx); + if (xpathobj == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + /* + * Extract the results as requested. + */ + if (res_nitems != NULL) + *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); + else + (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); + } + PG_CATCH(); + { + if (xpathobj) + xmlXPathFreeObject(xpathobj); + if (xpathcomp) + xmlXPathFreeCompExpr(xpathcomp); + if (xpathctx) + xmlXPathFreeContext(xpathctx); + if (doc) + xmlFreeDoc(doc); + if (ctxt) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlXPathFreeObject(xpathobj); + xmlXPathFreeCompExpr(xpathcomp); + xmlXPathFreeContext(xpathctx); + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, false); +} +#endif /* USE_LIBXML */ + +/* + * Evaluate XPath expression and return array of XML values. + * + * As we have no support of XQuery sequences yet, this function seems + * to be the most useful one (array of XML functions plays a role of + * some kind of substitution for XQuery sequences). + */ +Datum +xpath(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *xpath_expr_text = PG_GETARG_TEXT_PP(0); + xmltype *data = PG_GETARG_XML_P(1); + ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); + ArrayBuildState *astate; + + astate = initArrayResult(XMLOID, CurrentMemoryContext, true); + xpath_internal(xpath_expr_text, data, namespaces, + NULL, astate); + PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + +/* + * Determines if the node specified by the supplied XPath exists + * in a given XML document, returning a boolean. + */ +Datum +xmlexists(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *xpath_expr_text = PG_GETARG_TEXT_PP(0); + xmltype *data = PG_GETARG_XML_P(1); + int res_nitems; + + xpath_internal(xpath_expr_text, data, NULL, + &res_nitems, NULL); + + PG_RETURN_BOOL(res_nitems > 0); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + +/* + * Determines if the node specified by the supplied XPath exists + * in a given XML document, returning a boolean. Differs from + * xmlexists as it supports namespaces and is not defined in SQL/XML. + */ +Datum +xpath_exists(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *xpath_expr_text = PG_GETARG_TEXT_PP(0); + xmltype *data = PG_GETARG_XML_P(1); + ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); + int res_nitems; + + xpath_internal(xpath_expr_text, data, namespaces, + &res_nitems, NULL); + + PG_RETURN_BOOL(res_nitems > 0); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + +/* + * Functions for checking well-formed-ness + */ + +#ifdef USE_LIBXML +static bool +wellformed_xml(text *data, XmlOptionType xmloption_arg) +{ + bool result; + volatile xmlDocPtr doc = NULL; + + /* We want to catch any exceptions and return false */ + PG_TRY(); + { + doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding()); + result = true; + } + PG_CATCH(); + { + FlushErrorState(); + result = false; + } + PG_END_TRY(); + + if (doc) + xmlFreeDoc(doc); + + return result; +} +#endif + +Datum +xml_is_well_formed(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_PP(0); + + PG_RETURN_BOOL(wellformed_xml(data, xmloption)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +Datum +xml_is_well_formed_document(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_PP(0); + + PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +Datum +xml_is_well_formed_content(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_PP(0); + + PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +/* + * support functions for XMLTABLE + * + */ +#ifdef USE_LIBXML + +/* + * Returns private data from executor state. Ensure validity by check with + * MAGIC number. + */ +static inline XmlTableBuilderData * +GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname) +{ + XmlTableBuilderData *result; + + if (!IsA(state, TableFuncScanState)) + elog(ERROR, "%s called with invalid TableFuncScanState", fname); + result = (XmlTableBuilderData *) state->opaque; + if (result->magic != XMLTABLE_CONTEXT_MAGIC) + elog(ERROR, "%s called with invalid TableFuncScanState", fname); + + return result; +} +#endif + +/* + * XmlTableInitOpaque + * Fill in TableFuncScanState->opaque for XmlTable processor; initialize + * the XML parser. + * + * Note: Because we call pg_xml_init() here and pg_xml_done() in + * XmlTableDestroyOpaque, it is critical for robustness that no other + * executor nodes run until this node is processed to completion. Caller + * must execute this to completion (probably filling a tuplestore to exhaust + * this node in a single pass) instead of using row-per-call mode. + */ +static void +XmlTableInitOpaque(TableFuncScanState *state, int natts) +{ +#ifdef USE_LIBXML + volatile xmlParserCtxtPtr ctxt = NULL; + XmlTableBuilderData *xtCxt; + PgXmlErrorContext *xmlerrcxt; + + xtCxt = palloc0(sizeof(XmlTableBuilderData)); + xtCxt->magic = XMLTABLE_CONTEXT_MAGIC; + xtCxt->natts = natts; + xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts); + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + xmlInitParser(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + } + PG_CATCH(); + { + if (ctxt != NULL) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xtCxt->xmlerrcxt = xmlerrcxt; + xtCxt->ctxt = ctxt; + + state->opaque = xtCxt; +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetDocument + * Install the input document + */ +static void +XmlTableSetDocument(TableFuncScanState *state, Datum value) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmltype *xmlval = DatumGetXmlP(value); + char *str; + xmlChar *xstr; + int length; + volatile xmlDocPtr doc = NULL; + volatile xmlXPathContextPtr xpathcxt = NULL; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument"); + + /* + * Use out function for casting to string (remove encoding property). See + * comment in xml_out. + */ + str = xml_out_internal(xmlval, 0); + + length = strlen(str); + xstr = pg_xmlCharStrndup(str, length); + + PG_TRY(); + { + doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0); + if (doc == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "could not parse XML document"); + xpathcxt = xmlXPathNewContext(doc); + if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XPath context"); + xpathcxt->node = (xmlNodePtr) doc; + } + PG_CATCH(); + { + if (xpathcxt != NULL) + xmlXPathFreeContext(xpathcxt); + if (doc != NULL) + xmlFreeDoc(doc); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xtCxt->doc = doc; + xtCxt->xpathcxt = xpathcxt; +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetNamespace + * Add a namespace declaration + */ +static void +XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + if (name == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("DEFAULT namespace is not supported"))); + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace"); + + if (xmlXPathRegisterNs(xtCxt->xpathcxt, + pg_xmlCharStrndup(name, strlen(name)), + pg_xmlCharStrndup(uri, strlen(uri)))) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION, + "could not set XML namespace"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetRowFilter + * Install the row-filter Xpath expression. + */ +static void +XmlTableSetRowFilter(TableFuncScanState *state, const char *path) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmlChar *xstr; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter"); + + if (*path == '\0') + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("row path filter must not be empty string"))); + + xstr = pg_xmlCharStrndup(path, strlen(path)); + + xtCxt->xpathcomp = xmlXPathCompile(xstr); + if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR, + "invalid XPath expression"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetColumnFilter + * Install the column-filter Xpath expression, for the given column. + */ +static void +XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmlChar *xstr; + + AssertArg(PointerIsValid(path)); + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter"); + + if (*path == '\0') + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("column path filter must not be empty string"))); + + xstr = pg_xmlCharStrndup(path, strlen(path)); + + xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr); + if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION, + "invalid XPath expression"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableFetchRow + * Prepare the next "current" tuple for upcoming GetValue calls. + * Returns false if the row-filter expression returned no more rows. + */ +static bool +XmlTableFetchRow(TableFuncScanState *state) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow"); + + /* Propagate our own error context to libxml2 */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + if (xtCxt->xpathobj == NULL) + { + xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt); + if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + xtCxt->row_count = 0; + } + + if (xtCxt->xpathobj->type == XPATH_NODESET) + { + if (xtCxt->xpathobj->nodesetval != NULL) + { + if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr) + return true; + } + } + + return false; +#else + NO_XML_SUPPORT(); + return false; +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableGetValue + * Return the value for column number 'colnum' for the current row. If + * column -1 is requested, return representation of the whole row. + * + * This leaks memory, so be sure to reset often the context in which it's + * called. + */ +static Datum +XmlTableGetValue(TableFuncScanState *state, int colnum, + Oid typid, int32 typmod, bool *isnull) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + Datum result = (Datum) 0; + xmlNodePtr cur; + char *cstr = NULL; + volatile xmlXPathObjectPtr xpathobj = NULL; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue"); + + Assert(xtCxt->xpathobj && + xtCxt->xpathobj->type == XPATH_NODESET && + xtCxt->xpathobj->nodesetval != NULL); + + /* Propagate our own error context to libxml2 */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + *isnull = false; + + cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1]; + + Assert(xtCxt->xpathscomp[colnum] != NULL); + + PG_TRY(); + { + /* Set current node as entry point for XPath evaluation */ + xtCxt->xpathcxt->node = cur; + + /* Evaluate column path */ + xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt); + if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + /* + * There are four possible cases, depending on the number of nodes + * returned by the XPath expression and the type of the target column: + * a) XPath returns no nodes. b) The target type is XML (return all + * as XML). For non-XML return types: c) One node (return content). + * d) Multiple nodes (error). + */ + if (xpathobj->type == XPATH_NODESET) + { + int count = 0; + + if (xpathobj->nodesetval != NULL) + count = xpathobj->nodesetval->nodeNr; + + if (xpathobj->nodesetval == NULL || count == 0) + { + *isnull = true; + } + else + { + if (typid == XMLOID) + { + text *textstr; + StringInfoData str; + + /* Concatenate serialized values */ + initStringInfo(&str); + for (int i = 0; i < count; i++) + { + textstr = + xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], + xtCxt->xmlerrcxt); + + appendStringInfoText(&str, textstr); + } + cstr = str.data; + } + else + { + xmlChar *str; + + if (count > 1) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + errmsg("more than one value returned by column XPath expression"))); + + str = xmlXPathCastNodeSetToString(xpathobj->nodesetval); + cstr = str ? xml_pstrdup_and_free(str) : ""; + } + } + } + else if (xpathobj->type == XPATH_STRING) + { + /* Content should be escaped when target will be XML */ + if (typid == XMLOID) + cstr = escape_xml((char *) xpathobj->stringval); + else + cstr = (char *) xpathobj->stringval; + } + else if (xpathobj->type == XPATH_BOOLEAN) + { + char typcategory; + bool typispreferred; + xmlChar *str; + + /* Allow implicit casting from boolean to numbers */ + get_type_category_preferred(typid, &typcategory, &typispreferred); + + if (typcategory != TYPCATEGORY_NUMERIC) + str = xmlXPathCastBooleanToString(xpathobj->boolval); + else + str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval)); + + cstr = xml_pstrdup_and_free(str); + } + else if (xpathobj->type == XPATH_NUMBER) + { + xmlChar *str; + + str = xmlXPathCastNumberToString(xpathobj->floatval); + cstr = xml_pstrdup_and_free(str); + } + else + elog(ERROR, "unexpected XPath object type %u", xpathobj->type); + + /* + * By here, either cstr contains the result value, or the isnull flag + * has been set. + */ + Assert(cstr || *isnull); + + if (!*isnull) + result = InputFunctionCall(&state->in_functions[colnum], + cstr, + state->typioparams[colnum], + typmod); + } + PG_FINALLY(); + { + if (xpathobj != NULL) + xmlXPathFreeObject(xpathobj); + } + PG_END_TRY(); + + return result; +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableDestroyOpaque + * Release all libxml2 resources + */ +static void +XmlTableDestroyOpaque(TableFuncScanState *state) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque"); + + /* Propagate our own error context to libxml2 */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + if (xtCxt->xpathscomp != NULL) + { + int i; + + for (i = 0; i < xtCxt->natts; i++) + if (xtCxt->xpathscomp[i] != NULL) + xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]); + } + + if (xtCxt->xpathobj != NULL) + xmlXPathFreeObject(xtCxt->xpathobj); + if (xtCxt->xpathcomp != NULL) + xmlXPathFreeCompExpr(xtCxt->xpathcomp); + if (xtCxt->xpathcxt != NULL) + xmlXPathFreeContext(xtCxt->xpathcxt); + if (xtCxt->doc != NULL) + xmlFreeDoc(xtCxt->doc); + if (xtCxt->ctxt != NULL) + xmlFreeParserCtxt(xtCxt->ctxt); + + pg_xml_done(xtCxt->xmlerrcxt, true); + + /* not valid anymore */ + xtCxt->magic = 0; + state->opaque = NULL; + +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} |