diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 17:41:08 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 17:41:08 +0000 |
commit | 506ed8899b3a97e512be3fd6d44d5b11463bf9bf (patch) | |
tree | 808913770c5e6935d3714058c2a066c57b4632ec /psycopg_c | |
parent | Initial commit. (diff) | |
download | psycopg3-upstream.tar.xz psycopg3-upstream.zip |
Adding upstream version 3.1.7.upstream/3.1.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'psycopg_c')
37 files changed, 6876 insertions, 0 deletions
diff --git a/psycopg_c/.flake8 b/psycopg_c/.flake8 new file mode 100644 index 0000000..2ae629c --- /dev/null +++ b/psycopg_c/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 88 +ignore = W503, E203 diff --git a/psycopg_c/LICENSE.txt b/psycopg_c/LICENSE.txt new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/psycopg_c/LICENSE.txt @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/psycopg_c/README-binary.rst b/psycopg_c/README-binary.rst new file mode 100644 index 0000000..9318d57 --- /dev/null +++ b/psycopg_c/README-binary.rst @@ -0,0 +1,29 @@ +Psycopg 3: PostgreSQL database adapter for Python - binary package +================================================================== + +This distribution contains the precompiled optimization package +``psycopg_binary``. + +You shouldn't install this package directly: use instead :: + + pip install "psycopg[binary]" + +to install a version of the optimization package matching the ``psycopg`` +version installed. + +Installing this package requires pip >= 20.3 or newer installed. + +This package is not available for every platform: check out `Binary +installation`__ in the documentation. + +.. __: https://www.psycopg.org/psycopg3/docs/basic/install.html + #binary-installation + +Please read `the project readme`__ and `the installation documentation`__ for +more details. + +.. __: https://github.com/psycopg/psycopg#readme +.. __: https://www.psycopg.org/psycopg3/docs/basic/install.html + + +Copyright (C) 2020 The Psycopg Team diff --git a/psycopg_c/README.rst b/psycopg_c/README.rst new file mode 100644 index 0000000..de9ba93 --- /dev/null +++ b/psycopg_c/README.rst @@ -0,0 +1,33 @@ +Psycopg 3: PostgreSQL database adapter for Python - optimisation package +======================================================================== + +This distribution contains the optional optimization package ``psycopg_c``. + +You shouldn't install this package directly: use instead :: + + pip install "psycopg[c]" + +to install a version of the optimization package matching the ``psycopg`` +version installed. + +Installing this package requires some prerequisites: check `Local +installation`__ in the documentation. Without a C compiler and some library +headers install *will fail*: this is not a bug. + +If you are unable to meet the prerequisite needed you might want to install +``psycopg[binary]`` instead: look for `Binary installation`__ in the +documentation. + +.. __: https://www.psycopg.org/psycopg3/docs/basic/install.html + #local-installation +.. __: https://www.psycopg.org/psycopg3/docs/basic/install.html + #binary-installation + +Please read `the project readme`__ and `the installation documentation`__ for +more details. + +.. __: https://github.com/psycopg/psycopg#readme +.. __: https://www.psycopg.org/psycopg3/docs/basic/install.html + + +Copyright (C) 2020 The Psycopg Team diff --git a/psycopg_c/psycopg_c/.gitignore b/psycopg_c/psycopg_c/.gitignore new file mode 100644 index 0000000..36edb64 --- /dev/null +++ b/psycopg_c/psycopg_c/.gitignore @@ -0,0 +1,4 @@ +/*.so +_psycopg.c +pq.c +*.html diff --git a/psycopg_c/psycopg_c/__init__.py b/psycopg_c/psycopg_c/__init__.py new file mode 100644 index 0000000..14db92b --- /dev/null +++ b/psycopg_c/psycopg_c/__init__.py @@ -0,0 +1,14 @@ +""" +psycopg -- PostgreSQL database adapter for Python -- C optimization package +""" + +# Copyright (C) 2020 The Psycopg Team + +import sys + +# This package shouldn't be imported before psycopg itself, or weird things +# will happen +if "psycopg" not in sys.modules: + raise ImportError("the psycopg package should be imported before psycopg_c") + +from .version import __version__ as __version__ # noqa diff --git a/psycopg_c/psycopg_c/_psycopg.pyi b/psycopg_c/psycopg_c/_psycopg.pyi new file mode 100644 index 0000000..bd7c63d --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg.pyi @@ -0,0 +1,84 @@ +""" +Stub representaton of the public objects exposed by the _psycopg module. + +TODO: this should be generated by mypy's stubgen but it crashes with no +information. Will submit a bug. +""" + +# Copyright (C) 2020 The Psycopg Team + +from typing import Any, Iterable, List, Optional, Sequence, Tuple + +from psycopg import pq +from psycopg import abc +from psycopg.rows import Row, RowMaker +from psycopg.adapt import AdaptersMap, PyFormat +from psycopg.pq.abc import PGconn, PGresult +from psycopg.connection import BaseConnection +from psycopg._compat import Deque + +class Transformer(abc.AdaptContext): + types: Optional[Tuple[int, ...]] + formats: Optional[List[pq.Format]] + def __init__(self, context: Optional[abc.AdaptContext] = None): ... + @classmethod + def from_context(cls, context: Optional[abc.AdaptContext]) -> "Transformer": ... + @property + def connection(self) -> Optional[BaseConnection[Any]]: ... + @property + def encoding(self) -> str: ... + @property + def adapters(self) -> AdaptersMap: ... + @property + def pgresult(self) -> Optional[PGresult]: ... + def set_pgresult( + self, + result: Optional["PGresult"], + *, + set_loaders: bool = True, + format: Optional[pq.Format] = None, + ) -> None: ... + def set_dumper_types(self, types: Sequence[int], format: pq.Format) -> None: ... + def set_loader_types(self, types: Sequence[int], format: pq.Format) -> None: ... + def dump_sequence( + self, params: Sequence[Any], formats: Sequence[PyFormat] + ) -> Sequence[Optional[abc.Buffer]]: ... + def as_literal(self, obj: Any) -> bytes: ... + def get_dumper(self, obj: Any, format: PyFormat) -> abc.Dumper: ... + def load_rows(self, row0: int, row1: int, make_row: RowMaker[Row]) -> List[Row]: ... + def load_row(self, row: int, make_row: RowMaker[Row]) -> Optional[Row]: ... + def load_sequence( + self, record: Sequence[Optional[abc.Buffer]] + ) -> Tuple[Any, ...]: ... + def get_loader(self, oid: int, format: pq.Format) -> abc.Loader: ... + +# Generators +def connect(conninfo: str) -> abc.PQGenConn[PGconn]: ... +def execute(pgconn: PGconn) -> abc.PQGen[List[PGresult]]: ... +def send(pgconn: PGconn) -> abc.PQGen[None]: ... +def fetch_many(pgconn: PGconn) -> abc.PQGen[List[PGresult]]: ... +def fetch(pgconn: PGconn) -> abc.PQGen[Optional[PGresult]]: ... +def pipeline_communicate( + pgconn: PGconn, commands: Deque[abc.PipelineCommand] +) -> abc.PQGen[List[List[PGresult]]]: ... +def wait_c( + gen: abc.PQGen[abc.RV], fileno: int, timeout: Optional[float] = None +) -> abc.RV: ... + +# Copy support +def format_row_text( + row: Sequence[Any], tx: abc.Transformer, out: Optional[bytearray] = None +) -> bytearray: ... +def format_row_binary( + row: Sequence[Any], tx: abc.Transformer, out: Optional[bytearray] = None +) -> bytearray: ... +def parse_row_text(data: abc.Buffer, tx: abc.Transformer) -> Tuple[Any, ...]: ... +def parse_row_binary(data: abc.Buffer, tx: abc.Transformer) -> Tuple[Any, ...]: ... + +# Arrays optimization +def array_load_text( + data: abc.Buffer, loader: abc.Loader, delimiter: bytes = b"," +) -> List[Any]: ... +def array_load_binary(data: abc.Buffer, tx: abc.Transformer) -> List[Any]: ... + +# vim: set syntax=python: diff --git a/psycopg_c/psycopg_c/_psycopg.pyx b/psycopg_c/psycopg_c/_psycopg.pyx new file mode 100644 index 0000000..9d2b8ba --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg.pyx @@ -0,0 +1,48 @@ +""" +psycopg_c._psycopg optimization module. + +The module contains optimized C code used in preference to Python code +if a compiler is available. +""" + +# Copyright (C) 2020 The Psycopg Team + +from psycopg_c cimport pq +from psycopg_c.pq cimport libpq +from psycopg_c._psycopg cimport oids + +import logging + +from psycopg.pq import Format as _pq_Format +from psycopg._enums import PyFormat as _py_Format + +logger = logging.getLogger("psycopg") + +PQ_TEXT = _pq_Format.TEXT +PQ_BINARY = _pq_Format.BINARY + +PG_AUTO = _py_Format.AUTO +PG_TEXT = _py_Format.TEXT +PG_BINARY = _py_Format.BINARY + + +cdef extern from *: + """ +#ifndef ARRAYSIZE +#define ARRAYSIZE(a) ((sizeof(a) / sizeof(*(a)))) +#endif + """ + int ARRAYSIZE(void *array) + + +include "_psycopg/adapt.pyx" +include "_psycopg/copy.pyx" +include "_psycopg/generators.pyx" +include "_psycopg/transform.pyx" +include "_psycopg/waiting.pyx" + +include "types/array.pyx" +include "types/datetime.pyx" +include "types/numeric.pyx" +include "types/bool.pyx" +include "types/string.pyx" diff --git a/psycopg_c/psycopg_c/_psycopg/__init__.pxd b/psycopg_c/psycopg_c/_psycopg/__init__.pxd new file mode 100644 index 0000000..db22deb --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg/__init__.pxd @@ -0,0 +1,9 @@ +""" +psycopg_c._psycopg cython module. + +This file is necessary to allow c-importing pxd files from this directory. +""" + +# Copyright (C) 2020 The Psycopg Team + +from psycopg_c._psycopg cimport oids diff --git a/psycopg_c/psycopg_c/_psycopg/adapt.pyx b/psycopg_c/psycopg_c/_psycopg/adapt.pyx new file mode 100644 index 0000000..a6d8e6a --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg/adapt.pyx @@ -0,0 +1,171 @@ +""" +C implementation of the adaptation system. + +This module maps each Python adaptation function to a C adaptation function. +Notice that C adaptation functions have a different signature because they can +avoid making a memory copy, however this makes impossible to expose them to +Python. + +This module exposes facilities to map the builtin adapters in python to +equivalent C implementations. + +""" + +# Copyright (C) 2020 The Psycopg Team + +from typing import Any + +cimport cython + +from libc.string cimport memcpy, memchr +from cpython.bytearray cimport PyByteArray_FromStringAndSize, PyByteArray_Resize +from cpython.bytearray cimport PyByteArray_GET_SIZE, PyByteArray_AS_STRING + +from psycopg_c.pq cimport _buffer_as_string_and_size, Escaping + +from psycopg import errors as e +from psycopg.pq.misc import error_message + + +@cython.freelist(8) +cdef class CDumper: + + cdef readonly object cls + cdef pq.PGconn _pgconn + + oid = oids.INVALID_OID + + def __cinit__(self, cls, context: Optional[AdaptContext] = None): + self.cls = cls + conn = context.connection if context is not None else None + self._pgconn = conn.pgconn if conn is not None else None + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + """Store the Postgres representation *obj* into *rv* at *offset* + + Return the number of bytes written to rv or -1 on Python exception. + + Subclasses must implement this method. The `dump()` implementation + transforms the result of this method to a bytearray so that it can be + returned to Python. + + The function interface allows C code to use this method automatically + to create larger buffers, e.g. for copy, composite objects, etc. + + Implementation note: as you will always need to make sure that rv + has enough space to include what you want to dump, `ensure_size()` + might probably come handy. + """ + raise NotImplementedError() + + def dump(self, obj): + """Return the Postgres representation of *obj* as Python array of bytes""" + cdef rv = PyByteArray_FromStringAndSize("", 0) + cdef Py_ssize_t length = self.cdump(obj, rv, 0) + PyByteArray_Resize(rv, length) + return rv + + def quote(self, obj): + cdef char *ptr + cdef char *ptr_out + cdef Py_ssize_t length + + value = self.dump(obj) + + if self._pgconn is not None: + esc = Escaping(self._pgconn) + # escaping and quoting + return esc.escape_literal(value) + + # This path is taken when quote is asked without a connection, + # usually it means by psycopg.sql.quote() or by + # 'Composible.as_string(None)'. Most often than not this is done by + # someone generating a SQL file to consume elsewhere. + + rv = PyByteArray_FromStringAndSize("", 0) + + # No quoting, only quote escaping, random bs escaping. See further. + esc = Escaping() + out = esc.escape_string(value) + + _buffer_as_string_and_size(out, &ptr, &length) + + if not memchr(ptr, b'\\', length): + # If the string has no backslash, the result is correct and we + # don't need to bother with standard_conforming_strings. + PyByteArray_Resize(rv, length + 2) # Must include the quotes + ptr_out = PyByteArray_AS_STRING(rv) + ptr_out[0] = b"'" + memcpy(ptr_out + 1, ptr, length) + ptr_out[length + 1] = b"'" + return rv + + # The libpq has a crazy behaviour: PQescapeString uses the last + # standard_conforming_strings setting seen on a connection. This + # means that backslashes might be escaped or might not. + # + # A syntax E'\\' works everywhere, whereas E'\' is an error. OTOH, + # if scs is off, '\\' raises a warning and '\' is an error. + # + # Check what the libpq does, and if it doesn't escape the backslash + # let's do it on our own. Never mind the race condition. + PyByteArray_Resize(rv, length + 4) # Must include " E'...'" quotes + ptr_out = PyByteArray_AS_STRING(rv) + ptr_out[0] = b" " + ptr_out[1] = b"E" + ptr_out[2] = b"'" + memcpy(ptr_out + 3, ptr, length) + ptr_out[length + 3] = b"'" + + if esc.escape_string(b"\\") == b"\\": + rv = bytes(rv).replace(b"\\", b"\\\\") + return rv + + cpdef object get_key(self, object obj, object format): + return self.cls + + cpdef object upgrade(self, object obj, object format): + return self + + @staticmethod + cdef char *ensure_size(bytearray ba, Py_ssize_t offset, Py_ssize_t size) except NULL: + """ + Grow *ba*, if necessary, to contains at least *size* bytes after *offset* + + Return the pointer in the bytearray at *offset*, i.e. the place where + you want to write *size* bytes. + """ + cdef Py_ssize_t curr_size = PyByteArray_GET_SIZE(ba) + cdef Py_ssize_t new_size = offset + size + if curr_size < new_size: + PyByteArray_Resize(ba, new_size) + + return PyByteArray_AS_STRING(ba) + offset + + +@cython.freelist(8) +cdef class CLoader: + cdef public libpq.Oid oid + cdef pq.PGconn _pgconn + + def __cinit__(self, int oid, context: Optional[AdaptContext] = None): + self.oid = oid + conn = context.connection if context is not None else None + self._pgconn = conn.pgconn if conn is not None else None + + cdef object cload(self, const char *data, size_t length): + raise NotImplementedError() + + def load(self, object data) -> Any: + cdef char *ptr + cdef Py_ssize_t length + _buffer_as_string_and_size(data, &ptr, &length) + return self.cload(ptr, length) + + +cdef class _CRecursiveLoader(CLoader): + + cdef Transformer _tx + + def __cinit__(self, oid: int, context: Optional[AdaptContext] = None): + self._tx = Transformer.from_context(context) diff --git a/psycopg_c/psycopg_c/_psycopg/copy.pyx b/psycopg_c/psycopg_c/_psycopg/copy.pyx new file mode 100644 index 0000000..b943095 --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg/copy.pyx @@ -0,0 +1,340 @@ +""" +C optimised functions for the copy system. + +""" + +# Copyright (C) 2020 The Psycopg Team + +from libc.string cimport memcpy +from libc.stdint cimport uint16_t, uint32_t, int32_t +from cpython.bytearray cimport PyByteArray_FromStringAndSize, PyByteArray_Resize +from cpython.bytearray cimport PyByteArray_AS_STRING, PyByteArray_GET_SIZE +from cpython.memoryview cimport PyMemoryView_FromObject + +from psycopg_c._psycopg cimport endian +from psycopg_c.pq cimport ViewBuffer + +from psycopg import errors as e + +cdef int32_t _binary_null = -1 + + +def format_row_binary( + row: Sequence[Any], tx: Transformer, out: bytearray = None +) -> bytearray: + """Convert a row of adapted data to the data to send for binary copy""" + cdef Py_ssize_t rowlen = len(row) + cdef uint16_t berowlen = endian.htobe16(<int16_t>rowlen) + + cdef Py_ssize_t pos # offset in 'out' where to write + if out is None: + out = PyByteArray_FromStringAndSize("", 0) + pos = 0 + else: + pos = PyByteArray_GET_SIZE(out) + + # let's start from a nice chunk + # (larger than most fixed size; for variable ones, oh well, we'll resize it) + cdef char *target = CDumper.ensure_size( + out, pos, sizeof(berowlen) + 20 * rowlen) + + # Write the number of fields as network-order 16 bits + memcpy(target, <void *>&berowlen, sizeof(berowlen)) + pos += sizeof(berowlen) + + cdef Py_ssize_t size + cdef uint32_t besize + cdef char *buf + cdef int i + cdef PyObject *fmt = <PyObject *>PG_BINARY + cdef PyObject *row_dumper + + if not tx._row_dumpers: + tx._row_dumpers = PyList_New(rowlen) + + dumpers = tx._row_dumpers + + for i in range(rowlen): + item = row[i] + if item is None: + target = CDumper.ensure_size(out, pos, sizeof(_binary_null)) + memcpy(target, <void *>&_binary_null, sizeof(_binary_null)) + pos += sizeof(_binary_null) + continue + + row_dumper = PyList_GET_ITEM(dumpers, i) + if not row_dumper: + row_dumper = tx.get_row_dumper(<PyObject *>item, fmt) + Py_INCREF(<object>row_dumper) + PyList_SET_ITEM(dumpers, i, <object>row_dumper) + + if (<RowDumper>row_dumper).cdumper is not None: + # A cdumper can resize if necessary and copy in place + size = (<RowDumper>row_dumper).cdumper.cdump( + item, out, pos + sizeof(besize)) + # Also add the size of the item, before the item + besize = endian.htobe32(<int32_t>size) + target = PyByteArray_AS_STRING(out) # might have been moved by cdump + memcpy(target + pos, <void *>&besize, sizeof(besize)) + else: + # A Python dumper, gotta call it and extract its juices + b = PyObject_CallFunctionObjArgs( + (<RowDumper>row_dumper).dumpfunc, <PyObject *>item, NULL) + _buffer_as_string_and_size(b, &buf, &size) + target = CDumper.ensure_size(out, pos, size + sizeof(besize)) + besize = endian.htobe32(<int32_t>size) + memcpy(target, <void *>&besize, sizeof(besize)) + memcpy(target + sizeof(besize), buf, size) + + pos += size + sizeof(besize) + + # Resize to the final size + PyByteArray_Resize(out, pos) + return out + + +def format_row_text( + row: Sequence[Any], tx: Transformer, out: bytearray = None +) -> bytearray: + cdef Py_ssize_t pos # offset in 'out' where to write + if out is None: + out = PyByteArray_FromStringAndSize("", 0) + pos = 0 + else: + pos = PyByteArray_GET_SIZE(out) + + cdef Py_ssize_t rowlen = len(row) + + if rowlen == 0: + PyByteArray_Resize(out, pos + 1) + out[pos] = b"\n" + return out + + cdef Py_ssize_t size, tmpsize + cdef char *buf + cdef int i, j + cdef unsigned char *target + cdef int nesc = 0 + cdef int with_tab + cdef PyObject *fmt = <PyObject *>PG_TEXT + cdef PyObject *row_dumper + + for i in range(rowlen): + # Include the tab before the data, so it gets included in the resizes + with_tab = i > 0 + + item = row[i] + if item is None: + if with_tab: + target = <unsigned char *>CDumper.ensure_size(out, pos, 3) + memcpy(target, b"\t\\N", 3) + pos += 3 + else: + target = <unsigned char *>CDumper.ensure_size(out, pos, 2) + memcpy(target, b"\\N", 2) + pos += 2 + continue + + row_dumper = tx.get_row_dumper(<PyObject *>item, fmt) + if (<RowDumper>row_dumper).cdumper is not None: + # A cdumper can resize if necessary and copy in place + size = (<RowDumper>row_dumper).cdumper.cdump( + item, out, pos + with_tab) + target = <unsigned char *>PyByteArray_AS_STRING(out) + pos + else: + # A Python dumper, gotta call it and extract its juices + b = PyObject_CallFunctionObjArgs( + (<RowDumper>row_dumper).dumpfunc, <PyObject *>item, NULL) + _buffer_as_string_and_size(b, &buf, &size) + target = <unsigned char *>CDumper.ensure_size(out, pos, size + with_tab) + memcpy(target + with_tab, buf, size) + + # Prepend a tab to the data just written + if with_tab: + target[0] = b"\t" + target += 1 + pos += 1 + + # Now from pos to pos + size there is a textual representation: it may + # contain chars to escape. Scan to find how many such chars there are. + for j in range(size): + if copy_escape_lut[target[j]]: + nesc += 1 + + # If there is any char to escape, walk backwards pushing the chars + # forward and interspersing backslashes. + if nesc > 0: + tmpsize = size + nesc + target = <unsigned char *>CDumper.ensure_size(out, pos, tmpsize) + for j in range(<int>size - 1, -1, -1): + if copy_escape_lut[target[j]]: + target[j + nesc] = copy_escape_lut[target[j]] + nesc -= 1 + target[j + nesc] = b"\\" + if nesc <= 0: + break + else: + target[j + nesc] = target[j] + pos += tmpsize + else: + pos += size + + # Resize to the final size, add the newline + PyByteArray_Resize(out, pos + 1) + out[pos] = b"\n" + return out + + +def parse_row_binary(data, tx: Transformer) -> Tuple[Any, ...]: + cdef unsigned char *ptr + cdef Py_ssize_t bufsize + _buffer_as_string_and_size(data, <char **>&ptr, &bufsize) + cdef unsigned char *bufend = ptr + bufsize + + cdef uint16_t benfields = (<uint16_t *>ptr)[0] + cdef int nfields = endian.be16toh(benfields) + ptr += sizeof(benfields) + cdef list row = PyList_New(nfields) + + cdef int col + cdef int32_t belength + cdef Py_ssize_t length + + for col in range(nfields): + memcpy(&belength, ptr, sizeof(belength)) + ptr += sizeof(belength) + if belength == _binary_null: + field = None + else: + length = endian.be32toh(belength) + if ptr + length > bufend: + raise e.DataError("bad copy data: length exceeding data") + field = PyMemoryView_FromObject( + ViewBuffer._from_buffer(data, ptr, length)) + ptr += length + + Py_INCREF(field) + PyList_SET_ITEM(row, col, field) + + return tx.load_sequence(row) + + +def parse_row_text(data, tx: Transformer) -> Tuple[Any, ...]: + cdef unsigned char *fstart + cdef Py_ssize_t size + _buffer_as_string_and_size(data, <char **>&fstart, &size) + + # politely assume that the number of fields will be what in the result + cdef int nfields = tx._nfields + cdef list row = PyList_New(nfields) + + cdef unsigned char *fend + cdef unsigned char *rowend = fstart + size + cdef unsigned char *src + cdef unsigned char *tgt + cdef int col + cdef int num_bs + + for col in range(nfields): + fend = fstart + num_bs = 0 + # Scan to the end of the field, remember if you see any backslash + while fend[0] != b'\t' and fend[0] != b'\n' and fend < rowend: + if fend[0] == b'\\': + num_bs += 1 + # skip the next char to avoid counting escaped backslashes twice + fend += 1 + fend += 1 + + # Check if we stopped for the right reason + if fend >= rowend: + raise e.DataError("bad copy data: field delimiter not found") + elif fend[0] == b'\t' and col == nfields - 1: + raise e.DataError("bad copy data: got a tab at the end of the row") + elif fend[0] == b'\n' and col != nfields - 1: + raise e.DataError( + "bad copy format: got a newline before the end of the row") + + # Is this a NULL? + if fend - fstart == 2 and fstart[0] == b'\\' and fstart[1] == b'N': + field = None + + # Is this a field with no backslash? + elif num_bs == 0: + # Nothing to unescape: we don't need a copy + field = PyMemoryView_FromObject( + ViewBuffer._from_buffer(data, fstart, fend - fstart)) + + # This is a field containing backslashes + else: + # We need a copy of the buffer to unescape + field = PyByteArray_FromStringAndSize("", 0) + PyByteArray_Resize(field, fend - fstart - num_bs) + tgt = <unsigned char *>PyByteArray_AS_STRING(field) + src = fstart + while (src < fend): + if src[0] != b'\\': + tgt[0] = src[0] + else: + src += 1 + tgt[0] = copy_unescape_lut[src[0]] + src += 1 + tgt += 1 + + Py_INCREF(field) + PyList_SET_ITEM(row, col, field) + + # Start of the field + fstart = fend + 1 + + # Convert the array of buffers into Python objects + return tx.load_sequence(row) + + +cdef extern from *: + """ +/* handle chars to (un)escape in text copy representation */ +/* '\b', '\t', '\n', '\v', '\f', '\r', '\\' */ + +/* Escaping chars */ +static const char copy_escape_lut[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 98, 116, 110, 118, 102, 114, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* Conversion of escaped to unescaped chars */ +static const char copy_unescape_lut[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 8, 99, 100, 101, 12, 103, 104, 105, 106, 107, 108, 109, 10, 111, +112, 113, 13, 115, 9, 117, 11, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}; + """ + const char[256] copy_escape_lut + const char[256] copy_unescape_lut diff --git a/psycopg_c/psycopg_c/_psycopg/endian.pxd b/psycopg_c/psycopg_c/_psycopg/endian.pxd new file mode 100644 index 0000000..44e7305 --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg/endian.pxd @@ -0,0 +1,155 @@ +""" +Access to endian conversion function +""" + +# Copyright (C) 2020 The Psycopg Team + +from libc.stdint cimport uint16_t, uint32_t, uint64_t + +cdef extern from * nogil: + # from https://gist.github.com/panzi/6856583 + # Improved in: + # https://github.com/linux-sunxi/sunxi-tools/blob/master/include/portable_endian.h + """ +// "License": Public Domain +// I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like. +// In case there are jurisdictions that don't support putting things in the public domain you can also consider it to +// be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it +// an example on how to get the endian conversion functions on different platforms. + +#ifndef PORTABLE_ENDIAN_H__ +#define PORTABLE_ENDIAN_H__ + +#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) + +# define __WINDOWS__ + +#endif + +#if defined(__linux__) || defined(__CYGWIN__) + +# include <endian.h> + +#elif defined(__APPLE__) + +# include <libkern/OSByteOrder.h> + +# define htobe16(x) OSSwapHostToBigInt16(x) +# define htole16(x) OSSwapHostToLittleInt16(x) +# define be16toh(x) OSSwapBigToHostInt16(x) +# define le16toh(x) OSSwapLittleToHostInt16(x) + +# define htobe32(x) OSSwapHostToBigInt32(x) +# define htole32(x) OSSwapHostToLittleInt32(x) +# define be32toh(x) OSSwapBigToHostInt32(x) +# define le32toh(x) OSSwapLittleToHostInt32(x) + +# define htobe64(x) OSSwapHostToBigInt64(x) +# define htole64(x) OSSwapHostToLittleInt64(x) +# define be64toh(x) OSSwapBigToHostInt64(x) +# define le64toh(x) OSSwapLittleToHostInt64(x) + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) + +# include <sys/endian.h> + +/* For functions still missing, try to substitute 'historic' OpenBSD names */ +#ifndef be16toh +# define be16toh(x) betoh16(x) +#endif +#ifndef le16toh +# define le16toh(x) letoh16(x) +#endif +#ifndef be32toh +# define be32toh(x) betoh32(x) +#endif +#ifndef le32toh +# define le32toh(x) letoh32(x) +#endif +#ifndef be64toh +# define be64toh(x) betoh64(x) +#endif +#ifndef le64toh +# define le64toh(x) letoh64(x) +#endif + +#elif defined(__WINDOWS__) + +# include <winsock2.h> +# ifndef _MSC_VER +# include <sys/param.h> +# endif + +# if BYTE_ORDER == LITTLE_ENDIAN + +# define htobe16(x) htons(x) +# define htole16(x) (x) +# define be16toh(x) ntohs(x) +# define le16toh(x) (x) + +# define htobe32(x) htonl(x) +# define htole32(x) (x) +# define be32toh(x) ntohl(x) +# define le32toh(x) (x) + +# define htobe64(x) htonll(x) +# define htole64(x) (x) +# define be64toh(x) ntohll(x) +# define le64toh(x) (x) + +# elif BYTE_ORDER == BIG_ENDIAN + + /* that would be xbox 360 */ +# define htobe16(x) (x) +# define htole16(x) __builtin_bswap16(x) +# define be16toh(x) (x) +# define le16toh(x) __builtin_bswap16(x) + +# define htobe32(x) (x) +# define htole32(x) __builtin_bswap32(x) +# define be32toh(x) (x) +# define le32toh(x) __builtin_bswap32(x) + +# define htobe64(x) (x) +# define htole64(x) __builtin_bswap64(x) +# define be64toh(x) (x) +# define le64toh(x) __builtin_bswap64(x) + +# else + +# error byte order not supported + +# endif + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#else + +# error platform not supported + +#endif + +#endif + """ + cdef uint16_t htobe16(uint16_t host_16bits) + cdef uint16_t htole16(uint16_t host_16bits) + cdef uint16_t be16toh(uint16_t big_endian_16bits) + cdef uint16_t le16toh(uint16_t little_endian_16bits) + + cdef uint32_t htobe32(uint32_t host_32bits) + cdef uint32_t htole32(uint32_t host_32bits) + cdef uint32_t be32toh(uint32_t big_endian_32bits) + cdef uint32_t le32toh(uint32_t little_endian_32bits) + + cdef uint64_t htobe64(uint64_t host_64bits) + cdef uint64_t htole64(uint64_t host_64bits) + cdef uint64_t be64toh(uint64_t big_endian_64bits) + cdef uint64_t le64toh(uint64_t little_endian_64bits) diff --git a/psycopg_c/psycopg_c/_psycopg/generators.pyx b/psycopg_c/psycopg_c/_psycopg/generators.pyx new file mode 100644 index 0000000..9ce9e54 --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg/generators.pyx @@ -0,0 +1,276 @@ +""" +C implementation of generators for the communication protocols with the libpq +""" + +# Copyright (C) 2020 The Psycopg Team + +from cpython.object cimport PyObject_CallFunctionObjArgs + +from typing import List + +from psycopg import errors as e +from psycopg.pq import abc, error_message +from psycopg.abc import PipelineCommand, PQGen +from psycopg._enums import Wait, Ready +from psycopg._compat import Deque +from psycopg._encodings import conninfo_encoding + +cdef object WAIT_W = Wait.W +cdef object WAIT_R = Wait.R +cdef object WAIT_RW = Wait.RW +cdef object PY_READY_R = Ready.R +cdef object PY_READY_W = Ready.W +cdef object PY_READY_RW = Ready.RW +cdef int READY_R = Ready.R +cdef int READY_W = Ready.W +cdef int READY_RW = Ready.RW + +def connect(conninfo: str) -> PQGenConn[abc.PGconn]: + """ + Generator to create a database connection without blocking. + + """ + cdef pq.PGconn conn = pq.PGconn.connect_start(conninfo.encode()) + cdef libpq.PGconn *pgconn_ptr = conn._pgconn_ptr + cdef int conn_status = libpq.PQstatus(pgconn_ptr) + cdef int poll_status + + while True: + if conn_status == libpq.CONNECTION_BAD: + encoding = conninfo_encoding(conninfo) + raise e.OperationalError( + f"connection is bad: {error_message(conn, encoding=encoding)}", + pgconn=conn + ) + + with nogil: + poll_status = libpq.PQconnectPoll(pgconn_ptr) + + if poll_status == libpq.PGRES_POLLING_OK: + break + elif poll_status == libpq.PGRES_POLLING_READING: + yield (libpq.PQsocket(pgconn_ptr), WAIT_R) + elif poll_status == libpq.PGRES_POLLING_WRITING: + yield (libpq.PQsocket(pgconn_ptr), WAIT_W) + elif poll_status == libpq.PGRES_POLLING_FAILED: + encoding = conninfo_encoding(conninfo) + raise e.OperationalError( + f"connection failed: {error_message(conn, encoding=encoding)}", + pgconn=conn + ) + else: + raise e.InternalError( + f"unexpected poll status: {poll_status}", pgconn=conn + ) + + conn.nonblocking = 1 + return conn + + +def execute(pq.PGconn pgconn) -> PQGen[List[abc.PGresult]]: + """ + Generator sending a query and returning results without blocking. + + The query must have already been sent using `pgconn.send_query()` or + similar. Flush the query and then return the result using nonblocking + functions. + + Return the list of results returned by the database (whether success + or error). + """ + yield from send(pgconn) + rv = yield from fetch_many(pgconn) + return rv + + +def send(pq.PGconn pgconn) -> PQGen[None]: + """ + Generator to send a query to the server without blocking. + + The query must have already been sent using `pgconn.send_query()` or + similar. Flush the query and then return the result using nonblocking + functions. + + After this generator has finished you may want to cycle using `fetch()` + to retrieve the results available. + """ + cdef libpq.PGconn *pgconn_ptr = pgconn._pgconn_ptr + cdef int status + cdef int cires + + while True: + if pgconn.flush() == 0: + break + + status = yield WAIT_RW + if status & READY_R: + with nogil: + # This call may read notifies which will be saved in the + # PGconn buffer and passed to Python later. + cires = libpq.PQconsumeInput(pgconn_ptr) + if 1 != cires: + raise e.OperationalError( + f"consuming input failed: {error_message(pgconn)}") + + +def fetch_many(pq.PGconn pgconn) -> PQGen[List[PGresult]]: + """ + Generator retrieving results from the database without blocking. + + The query must have already been sent to the server, so pgconn.flush() has + already returned 0. + + Return the list of results returned by the database (whether success + or error). + """ + cdef list results = [] + cdef int status + cdef pq.PGresult result + cdef libpq.PGresult *pgres + + while True: + result = yield from fetch(pgconn) + if result is None: + break + results.append(result) + pgres = result._pgresult_ptr + + status = libpq.PQresultStatus(pgres) + if ( + status == libpq.PGRES_COPY_IN + or status == libpq.PGRES_COPY_OUT + or status == libpq.PGRES_COPY_BOTH + ): + # After entering copy mode the libpq will create a phony result + # for every request so let's break the endless loop. + break + + if status == libpq.PGRES_PIPELINE_SYNC: + # PIPELINE_SYNC is not followed by a NULL, but we return it alone + # similarly to other result sets. + break + + return results + + +def fetch(pq.PGconn pgconn) -> PQGen[Optional[PGresult]]: + """ + Generator retrieving a single result from the database without blocking. + + The query must have already been sent to the server, so pgconn.flush() has + already returned 0. + + Return a result from the database (whether success or error). + """ + cdef libpq.PGconn *pgconn_ptr = pgconn._pgconn_ptr + cdef int cires, ibres + cdef libpq.PGresult *pgres + + with nogil: + ibres = libpq.PQisBusy(pgconn_ptr) + if ibres: + yield WAIT_R + while True: + with nogil: + cires = libpq.PQconsumeInput(pgconn_ptr) + if cires == 1: + ibres = libpq.PQisBusy(pgconn_ptr) + + if 1 != cires: + raise e.OperationalError( + f"consuming input failed: {error_message(pgconn)}") + if not ibres: + break + yield WAIT_R + + _consume_notifies(pgconn) + + with nogil: + pgres = libpq.PQgetResult(pgconn_ptr) + if pgres is NULL: + return None + return pq.PGresult._from_ptr(pgres) + + +def pipeline_communicate( + pq.PGconn pgconn, commands: Deque[PipelineCommand] +) -> PQGen[List[List[PGresult]]]: + """Generator to send queries from a connection in pipeline mode while also + receiving results. + + Return a list results, including single PIPELINE_SYNC elements. + """ + cdef libpq.PGconn *pgconn_ptr = pgconn._pgconn_ptr + cdef int cires + cdef int status + cdef int ready + cdef libpq.PGresult *pgres + cdef list res = [] + cdef list results = [] + cdef pq.PGresult r + + while True: + ready = yield WAIT_RW + + if ready & READY_R: + with nogil: + cires = libpq.PQconsumeInput(pgconn_ptr) + if 1 != cires: + raise e.OperationalError( + f"consuming input failed: {error_message(pgconn)}") + + _consume_notifies(pgconn) + + res: List[PGresult] = [] + while True: + with nogil: + ibres = libpq.PQisBusy(pgconn_ptr) + if ibres: + break + pgres = libpq.PQgetResult(pgconn_ptr) + + if pgres is NULL: + if not res: + break + results.append(res) + res = [] + else: + status = libpq.PQresultStatus(pgres) + r = pq.PGresult._from_ptr(pgres) + if status == libpq.PGRES_PIPELINE_SYNC: + results.append([r]) + break + else: + res.append(r) + + if ready & READY_W: + pgconn.flush() + if not commands: + break + commands.popleft()() + + return results + + +cdef int _consume_notifies(pq.PGconn pgconn) except -1: + cdef object notify_handler = pgconn.notify_handler + cdef libpq.PGconn *pgconn_ptr + cdef libpq.PGnotify *notify + + if notify_handler is not None: + while True: + pynotify = pgconn.notifies() + if pynotify is None: + break + PyObject_CallFunctionObjArgs( + notify_handler, <PyObject *>pynotify, NULL + ) + else: + pgconn_ptr = pgconn._pgconn_ptr + while True: + notify = libpq.PQnotifies(pgconn_ptr) + if notify is NULL: + break + libpq.PQfreemem(notify) + + return 0 diff --git a/psycopg_c/psycopg_c/_psycopg/oids.pxd b/psycopg_c/psycopg_c/_psycopg/oids.pxd new file mode 100644 index 0000000..2a864c4 --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg/oids.pxd @@ -0,0 +1,92 @@ +""" +Constants to refer to OIDS in C +""" + +# Copyright (C) 2020 The Psycopg Team + +# Use tools/update_oids.py to update this data. + +cdef enum: + INVALID_OID = 0 + + # autogenerated: start + + # Generated from PostgreSQL 15.0 + + ACLITEM_OID = 1033 + BIT_OID = 1560 + BOOL_OID = 16 + BOX_OID = 603 + BPCHAR_OID = 1042 + BYTEA_OID = 17 + CHAR_OID = 18 + CID_OID = 29 + CIDR_OID = 650 + CIRCLE_OID = 718 + DATE_OID = 1082 + DATEMULTIRANGE_OID = 4535 + DATERANGE_OID = 3912 + FLOAT4_OID = 700 + FLOAT8_OID = 701 + GTSVECTOR_OID = 3642 + INET_OID = 869 + INT2_OID = 21 + INT2VECTOR_OID = 22 + INT4_OID = 23 + INT4MULTIRANGE_OID = 4451 + INT4RANGE_OID = 3904 + INT8_OID = 20 + INT8MULTIRANGE_OID = 4536 + INT8RANGE_OID = 3926 + INTERVAL_OID = 1186 + JSON_OID = 114 + JSONB_OID = 3802 + JSONPATH_OID = 4072 + LINE_OID = 628 + LSEG_OID = 601 + MACADDR_OID = 829 + MACADDR8_OID = 774 + MONEY_OID = 790 + NAME_OID = 19 + NUMERIC_OID = 1700 + NUMMULTIRANGE_OID = 4532 + NUMRANGE_OID = 3906 + OID_OID = 26 + OIDVECTOR_OID = 30 + PATH_OID = 602 + PG_LSN_OID = 3220 + POINT_OID = 600 + POLYGON_OID = 604 + RECORD_OID = 2249 + REFCURSOR_OID = 1790 + REGCLASS_OID = 2205 + REGCOLLATION_OID = 4191 + REGCONFIG_OID = 3734 + REGDICTIONARY_OID = 3769 + REGNAMESPACE_OID = 4089 + REGOPER_OID = 2203 + REGOPERATOR_OID = 2204 + REGPROC_OID = 24 + REGPROCEDURE_OID = 2202 + REGROLE_OID = 4096 + REGTYPE_OID = 2206 + TEXT_OID = 25 + TID_OID = 27 + TIME_OID = 1083 + TIMESTAMP_OID = 1114 + TIMESTAMPTZ_OID = 1184 + TIMETZ_OID = 1266 + TSMULTIRANGE_OID = 4533 + TSQUERY_OID = 3615 + TSRANGE_OID = 3908 + TSTZMULTIRANGE_OID = 4534 + TSTZRANGE_OID = 3910 + TSVECTOR_OID = 3614 + TXID_SNAPSHOT_OID = 2970 + UUID_OID = 2950 + VARBIT_OID = 1562 + VARCHAR_OID = 1043 + XID_OID = 28 + XID8_OID = 5069 + XML_OID = 142 + # autogenerated: end diff --git a/psycopg_c/psycopg_c/_psycopg/transform.pyx b/psycopg_c/psycopg_c/_psycopg/transform.pyx new file mode 100644 index 0000000..fc69725 --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg/transform.pyx @@ -0,0 +1,640 @@ +""" +Helper object to transform values between Python and PostgreSQL + +Cython implementation: can access to lower level C features without creating +too many temporary Python objects and performing less memory copying. + +""" + +# Copyright (C) 2020 The Psycopg Team + +cimport cython +from cpython.ref cimport Py_INCREF, Py_DECREF +from cpython.set cimport PySet_Add, PySet_Contains +from cpython.dict cimport PyDict_GetItem, PyDict_SetItem +from cpython.list cimport ( + PyList_New, PyList_CheckExact, + PyList_GET_ITEM, PyList_SET_ITEM, PyList_GET_SIZE) +from cpython.bytes cimport PyBytes_AS_STRING +from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM +from cpython.object cimport PyObject, PyObject_CallFunctionObjArgs + +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple + +from psycopg import errors as e +from psycopg.pq import Format as PqFormat +from psycopg.rows import Row, RowMaker +from psycopg._encodings import pgconn_encoding + +NoneType = type(None) + +# internal structure: you are not supposed to know this. But it's worth some +# 10% of the innermost loop, so I'm willing to ask for forgiveness later... + +ctypedef struct PGresAttValue: + int len + char *value + +ctypedef struct pg_result_int: + # NOTE: it would be advised that we don't know this structure's content + int ntups + int numAttributes + libpq.PGresAttDesc *attDescs + PGresAttValue **tuples + # ...more members, which we ignore + + +@cython.freelist(16) +cdef class RowLoader: + cdef CLoader cloader + cdef object pyloader + cdef object loadfunc + + +@cython.freelist(16) +cdef class RowDumper: + cdef CDumper cdumper + cdef object pydumper + cdef object dumpfunc + cdef object oid + cdef object format + + +cdef class Transformer: + """ + An object that can adapt efficiently between Python and PostgreSQL. + + The life cycle of the object is the query, so it is assumed that attributes + such as the server version or the connection encoding will not change. The + object have its state so adapting several values of the same type can be + optimised. + + """ + + cdef readonly object connection + cdef readonly object adapters + cdef readonly object types + cdef readonly object formats + cdef str _encoding + cdef int _none_oid + + # mapping class -> Dumper instance (auto, text, binary) + cdef dict _auto_dumpers + cdef dict _text_dumpers + cdef dict _binary_dumpers + + # mapping oid -> Loader instance (text, binary) + cdef dict _text_loaders + cdef dict _binary_loaders + + # mapping oid -> Dumper instance (text, binary) + cdef dict _oid_text_dumpers + cdef dict _oid_binary_dumpers + + cdef pq.PGresult _pgresult + cdef int _nfields, _ntuples + cdef list _row_dumpers + cdef list _row_loaders + + cdef dict _oid_types + + def __cinit__(self, context: Optional["AdaptContext"] = None): + if context is not None: + self.adapters = context.adapters + self.connection = context.connection + else: + from psycopg import postgres + self.adapters = postgres.adapters + self.connection = None + + self.types = self.formats = None + self._none_oid = -1 + + @classmethod + def from_context(cls, context: Optional["AdaptContext"]): + """ + Return a Transformer from an AdaptContext. + + If the context is a Transformer instance, just return it. + """ + return _tx_from_context(context) + + @property + def encoding(self) -> str: + if not self._encoding: + conn = self.connection + self._encoding = pgconn_encoding(conn.pgconn) if conn else "utf-8" + return self._encoding + + @property + def pgresult(self) -> Optional[PGresult]: + return self._pgresult + + cpdef set_pgresult( + self, + pq.PGresult result, + object set_loaders = True, + object format = None + ): + self._pgresult = result + + if result is None: + self._nfields = self._ntuples = 0 + if set_loaders: + self._row_loaders = [] + return + + cdef libpq.PGresult *res = self._pgresult._pgresult_ptr + self._nfields = libpq.PQnfields(res) + self._ntuples = libpq.PQntuples(res) + + if not set_loaders: + return + + if not self._nfields: + self._row_loaders = [] + return + + if format is None: + format = libpq.PQfformat(res, 0) + + cdef list loaders = PyList_New(self._nfields) + cdef PyObject *row_loader + cdef object oid + + cdef int i + for i in range(self._nfields): + oid = libpq.PQftype(res, i) + row_loader = self._c_get_loader(<PyObject *>oid, <PyObject *>format) + Py_INCREF(<object>row_loader) + PyList_SET_ITEM(loaders, i, <object>row_loader) + + self._row_loaders = loaders + + def set_dumper_types(self, types: Sequence[int], format: Format) -> None: + cdef Py_ssize_t ntypes = len(types) + dumpers = PyList_New(ntypes) + cdef int i + for i in range(ntypes): + oid = types[i] + dumper_ptr = self.get_dumper_by_oid( + <PyObject *>oid, <PyObject *>format) + Py_INCREF(<object>dumper_ptr) + PyList_SET_ITEM(dumpers, i, <object>dumper_ptr) + + self._row_dumpers = dumpers + self.types = tuple(types) + self.formats = [format] * ntypes + + def set_loader_types(self, types: Sequence[int], format: Format) -> None: + self._c_loader_types(len(types), types, format) + + cdef void _c_loader_types(self, Py_ssize_t ntypes, list types, object format): + cdef list loaders = PyList_New(ntypes) + + # these are used more as Python object than C + cdef PyObject *oid + cdef PyObject *row_loader + for i in range(ntypes): + oid = PyList_GET_ITEM(types, i) + row_loader = self._c_get_loader(oid, <PyObject *>format) + Py_INCREF(<object>row_loader) + PyList_SET_ITEM(loaders, i, <object>row_loader) + + self._row_loaders = loaders + + cpdef as_literal(self, obj): + cdef PyObject *row_dumper = self.get_row_dumper( + <PyObject *>obj, <PyObject *>PG_TEXT) + + if (<RowDumper>row_dumper).cdumper is not None: + dumper = (<RowDumper>row_dumper).cdumper + else: + dumper = (<RowDumper>row_dumper).pydumper + + rv = dumper.quote(obj) + oid = dumper.oid + # If the result is quoted and the oid not unknown or text, + # add an explicit type cast. + # Check the last char because the first one might be 'E'. + if oid and oid != oids.TEXT_OID and rv and rv[-1] == 39: + if self._oid_types is None: + self._oid_types = {} + type_ptr = PyDict_GetItem(<object>self._oid_types, oid) + if type_ptr == NULL: + type_sql = b"" + ti = self.adapters.types.get(oid) + if ti is not None: + if oid < 8192: + # builtin: prefer "timestamptz" to "timestamp with time zone" + type_sql = ti.name.encode(self.encoding) + else: + type_sql = ti.regtype.encode(self.encoding) + if oid == ti.array_oid: + type_sql += b"[]" + + type_ptr = <PyObject *>type_sql + PyDict_SetItem(<object>self._oid_types, oid, type_sql) + + if <object>type_ptr: + rv = b"%s::%s" % (rv, <object>type_ptr) + + return rv + + def get_dumper(self, obj, format) -> "Dumper": + cdef PyObject *row_dumper = self.get_row_dumper( + <PyObject *>obj, <PyObject *>format) + return (<RowDumper>row_dumper).pydumper + + cdef PyObject *get_row_dumper(self, PyObject *obj, PyObject *fmt) except NULL: + """ + Return a borrowed reference to the RowDumper for the given obj/fmt. + """ + # Fast path: return a Dumper class already instantiated from the same type + cdef PyObject *cache + cdef PyObject *ptr + cdef PyObject *ptr1 + cdef RowDumper row_dumper + + # Normally, the type of the object dictates how to dump it + key = type(<object>obj) + + # Establish where would the dumper be cached + bfmt = PyUnicode_AsUTF8String(<object>fmt) + cdef char cfmt = PyBytes_AS_STRING(bfmt)[0] + if cfmt == b's': + if self._auto_dumpers is None: + self._auto_dumpers = {} + cache = <PyObject *>self._auto_dumpers + elif cfmt == b'b': + if self._binary_dumpers is None: + self._binary_dumpers = {} + cache = <PyObject *>self._binary_dumpers + elif cfmt == b't': + if self._text_dumpers is None: + self._text_dumpers = {} + cache = <PyObject *>self._text_dumpers + else: + raise ValueError( + f"format should be a psycopg.adapt.Format, not {<object>fmt}") + + # Reuse an existing Dumper class for objects of the same type + ptr = PyDict_GetItem(<object>cache, key) + if ptr == NULL: + dcls = PyObject_CallFunctionObjArgs( + self.adapters.get_dumper, <PyObject *>key, fmt, NULL) + dumper = PyObject_CallFunctionObjArgs( + dcls, <PyObject *>key, <PyObject *>self, NULL) + + row_dumper = _as_row_dumper(dumper) + PyDict_SetItem(<object>cache, key, row_dumper) + ptr = <PyObject *>row_dumper + + # Check if the dumper requires an upgrade to handle this specific value + if (<RowDumper>ptr).cdumper is not None: + key1 = (<RowDumper>ptr).cdumper.get_key(<object>obj, <object>fmt) + else: + key1 = PyObject_CallFunctionObjArgs( + (<RowDumper>ptr).pydumper.get_key, obj, fmt, NULL) + if key1 is key: + return ptr + + # If it does, ask the dumper to create its own upgraded version + ptr1 = PyDict_GetItem(<object>cache, key1) + if ptr1 != NULL: + return ptr1 + + if (<RowDumper>ptr).cdumper is not None: + dumper = (<RowDumper>ptr).cdumper.upgrade(<object>obj, <object>fmt) + else: + dumper = PyObject_CallFunctionObjArgs( + (<RowDumper>ptr).pydumper.upgrade, obj, fmt, NULL) + + row_dumper = _as_row_dumper(dumper) + PyDict_SetItem(<object>cache, key1, row_dumper) + return <PyObject *>row_dumper + + cdef PyObject *get_dumper_by_oid(self, PyObject *oid, PyObject *fmt) except NULL: + """ + Return a borrowed reference to the RowDumper for the given oid/fmt. + """ + cdef PyObject *ptr + cdef PyObject *cache + cdef RowDumper row_dumper + + # Establish where would the dumper be cached + cdef int cfmt = <object>fmt + if cfmt == 0: + if self._oid_text_dumpers is None: + self._oid_text_dumpers = {} + cache = <PyObject *>self._oid_text_dumpers + elif cfmt == 1: + if self._oid_binary_dumpers is None: + self._oid_binary_dumpers = {} + cache = <PyObject *>self._oid_binary_dumpers + else: + raise ValueError( + f"format should be a psycopg.pq.Format, not {<object>fmt}") + + # Reuse an existing Dumper class for objects of the same type + ptr = PyDict_GetItem(<object>cache, <object>oid) + if ptr == NULL: + dcls = PyObject_CallFunctionObjArgs( + self.adapters.get_dumper_by_oid, oid, fmt, NULL) + dumper = PyObject_CallFunctionObjArgs( + dcls, <PyObject *>NoneType, <PyObject *>self, NULL) + + row_dumper = _as_row_dumper(dumper) + PyDict_SetItem(<object>cache, <object>oid, row_dumper) + ptr = <PyObject *>row_dumper + + return ptr + + cpdef dump_sequence(self, object params, object formats): + # Verify that they are not none and that PyList_GET_ITEM won't blow up + cdef Py_ssize_t nparams = len(params) + cdef list out = PyList_New(nparams) + + cdef int i + cdef PyObject *dumper_ptr # borrowed pointer to row dumper + cdef object dumped + cdef Py_ssize_t size + + if self._none_oid < 0: + self._none_oid = self.adapters.get_dumper(NoneType, "s").oid + + dumpers = self._row_dumpers + + if dumpers: + for i in range(nparams): + param = params[i] + if param is not None: + dumper_ptr = PyList_GET_ITEM(dumpers, i) + if (<RowDumper>dumper_ptr).cdumper is not None: + dumped = PyByteArray_FromStringAndSize("", 0) + size = (<RowDumper>dumper_ptr).cdumper.cdump( + param, <bytearray>dumped, 0) + PyByteArray_Resize(dumped, size) + else: + dumped = PyObject_CallFunctionObjArgs( + (<RowDumper>dumper_ptr).dumpfunc, + <PyObject *>param, NULL) + else: + dumped = None + + Py_INCREF(dumped) + PyList_SET_ITEM(out, i, dumped) + + return out + + cdef tuple types = PyTuple_New(nparams) + cdef list pqformats = PyList_New(nparams) + + for i in range(nparams): + param = params[i] + if param is not None: + dumper_ptr = self.get_row_dumper( + <PyObject *>param, <PyObject *>formats[i]) + if (<RowDumper>dumper_ptr).cdumper is not None: + dumped = PyByteArray_FromStringAndSize("", 0) + size = (<RowDumper>dumper_ptr).cdumper.cdump( + param, <bytearray>dumped, 0) + PyByteArray_Resize(dumped, size) + else: + dumped = PyObject_CallFunctionObjArgs( + (<RowDumper>dumper_ptr).dumpfunc, + <PyObject *>param, NULL) + oid = (<RowDumper>dumper_ptr).oid + fmt = (<RowDumper>dumper_ptr).format + else: + dumped = None + oid = self._none_oid + fmt = PQ_TEXT + + Py_INCREF(dumped) + PyList_SET_ITEM(out, i, dumped) + + Py_INCREF(oid) + PyTuple_SET_ITEM(types, i, oid) + + Py_INCREF(fmt) + PyList_SET_ITEM(pqformats, i, fmt) + + self.types = types + self.formats = pqformats + return out + + def load_rows(self, int row0, int row1, object make_row) -> List[Row]: + if self._pgresult is None: + raise e.InterfaceError("result not set") + + if not (0 <= row0 <= self._ntuples and 0 <= row1 <= self._ntuples): + raise e.InterfaceError( + f"rows must be included between 0 and {self._ntuples}" + ) + + cdef libpq.PGresult *res = self._pgresult._pgresult_ptr + # cheeky access to the internal PGresult structure + cdef pg_result_int *ires = <pg_result_int*>res + + cdef int row + cdef int col + cdef PGresAttValue *attval + cdef object record # not 'tuple' as it would check on assignment + + cdef object records = PyList_New(row1 - row0) + for row in range(row0, row1): + record = PyTuple_New(self._nfields) + Py_INCREF(record) + PyList_SET_ITEM(records, row - row0, record) + + cdef PyObject *loader # borrowed RowLoader + cdef PyObject *brecord # borrowed + row_loaders = self._row_loaders # avoid an incref/decref per item + + for col in range(self._nfields): + loader = PyList_GET_ITEM(row_loaders, col) + if (<RowLoader>loader).cloader is not None: + for row in range(row0, row1): + brecord = PyList_GET_ITEM(records, row - row0) + attval = &(ires.tuples[row][col]) + if attval.len == -1: # NULL_LEN + pyval = None + else: + pyval = (<RowLoader>loader).cloader.cload( + attval.value, attval.len) + + Py_INCREF(pyval) + PyTuple_SET_ITEM(<object>brecord, col, pyval) + + else: + for row in range(row0, row1): + brecord = PyList_GET_ITEM(records, row - row0) + attval = &(ires.tuples[row][col]) + if attval.len == -1: # NULL_LEN + pyval = None + else: + b = PyMemoryView_FromObject( + ViewBuffer._from_buffer( + self._pgresult, + <unsigned char *>attval.value, attval.len)) + pyval = PyObject_CallFunctionObjArgs( + (<RowLoader>loader).loadfunc, <PyObject *>b, NULL) + + Py_INCREF(pyval) + PyTuple_SET_ITEM(<object>brecord, col, pyval) + + if make_row is not tuple: + for i in range(row1 - row0): + brecord = PyList_GET_ITEM(records, i) + record = PyObject_CallFunctionObjArgs( + make_row, <PyObject *>brecord, NULL) + Py_INCREF(record) + PyList_SET_ITEM(records, i, record) + Py_DECREF(<object>brecord) + return records + + def load_row(self, int row, object make_row) -> Optional[Row]: + if self._pgresult is None: + return None + + if not 0 <= row < self._ntuples: + return None + + cdef libpq.PGresult *res = self._pgresult._pgresult_ptr + # cheeky access to the internal PGresult structure + cdef pg_result_int *ires = <pg_result_int*>res + + cdef PyObject *loader # borrowed RowLoader + cdef int col + cdef PGresAttValue *attval + cdef object record # not 'tuple' as it would check on assignment + + record = PyTuple_New(self._nfields) + row_loaders = self._row_loaders # avoid an incref/decref per item + + for col in range(self._nfields): + attval = &(ires.tuples[row][col]) + if attval.len == -1: # NULL_LEN + pyval = None + else: + loader = PyList_GET_ITEM(row_loaders, col) + if (<RowLoader>loader).cloader is not None: + pyval = (<RowLoader>loader).cloader.cload( + attval.value, attval.len) + else: + b = PyMemoryView_FromObject( + ViewBuffer._from_buffer( + self._pgresult, + <unsigned char *>attval.value, attval.len)) + pyval = PyObject_CallFunctionObjArgs( + (<RowLoader>loader).loadfunc, <PyObject *>b, NULL) + + Py_INCREF(pyval) + PyTuple_SET_ITEM(record, col, pyval) + + if make_row is not tuple: + record = PyObject_CallFunctionObjArgs( + make_row, <PyObject *>record, NULL) + return record + + cpdef object load_sequence(self, record: Sequence[Optional[Buffer]]): + cdef Py_ssize_t nfields = len(record) + out = PyTuple_New(nfields) + cdef PyObject *loader # borrowed RowLoader + cdef int col + cdef char *ptr + cdef Py_ssize_t size + + row_loaders = self._row_loaders # avoid an incref/decref per item + if PyList_GET_SIZE(row_loaders) != nfields: + raise e.ProgrammingError( + f"cannot load sequence of {nfields} items:" + f" {len(self._row_loaders)} loaders registered") + + for col in range(nfields): + item = record[col] + if item is None: + Py_INCREF(None) + PyTuple_SET_ITEM(out, col, None) + continue + + loader = PyList_GET_ITEM(row_loaders, col) + if (<RowLoader>loader).cloader is not None: + _buffer_as_string_and_size(item, &ptr, &size) + pyval = (<RowLoader>loader).cloader.cload(ptr, size) + else: + pyval = PyObject_CallFunctionObjArgs( + (<RowLoader>loader).loadfunc, <PyObject *>item, NULL) + + Py_INCREF(pyval) + PyTuple_SET_ITEM(out, col, pyval) + + return out + + def get_loader(self, oid: int, format: pq.Format) -> "Loader": + cdef PyObject *row_loader = self._c_get_loader( + <PyObject *>oid, <PyObject *>format) + return (<RowLoader>row_loader).pyloader + + cdef PyObject *_c_get_loader(self, PyObject *oid, PyObject *fmt) except NULL: + """ + Return a borrowed reference to the RowLoader instance for given oid/fmt + """ + cdef PyObject *ptr + cdef PyObject *cache + + if <object>fmt == PQ_TEXT: + if self._text_loaders is None: + self._text_loaders = {} + cache = <PyObject *>self._text_loaders + elif <object>fmt == PQ_BINARY: + if self._binary_loaders is None: + self._binary_loaders = {} + cache = <PyObject *>self._binary_loaders + else: + raise ValueError( + f"format should be a psycopg.pq.Format, not {format}") + + ptr = PyDict_GetItem(<object>cache, <object>oid) + if ptr != NULL: + return ptr + + loader_cls = self.adapters.get_loader(<object>oid, <object>fmt) + if loader_cls is None: + loader_cls = self.adapters.get_loader(oids.INVALID_OID, <object>fmt) + if loader_cls is None: + raise e.InterfaceError("unknown oid loader not found") + + loader = PyObject_CallFunctionObjArgs( + loader_cls, oid, <PyObject *>self, NULL) + + cdef RowLoader row_loader = RowLoader() + row_loader.pyloader = loader + row_loader.loadfunc = loader.load + if isinstance(loader, CLoader): + row_loader.cloader = <CLoader>loader + + PyDict_SetItem(<object>cache, <object>oid, row_loader) + return <PyObject *>row_loader + + +cdef object _as_row_dumper(object dumper): + cdef RowDumper row_dumper = RowDumper() + + row_dumper.pydumper = dumper + row_dumper.dumpfunc = dumper.dump + row_dumper.oid = dumper.oid + row_dumper.format = dumper.format + + if isinstance(dumper, CDumper): + row_dumper.cdumper = <CDumper>dumper + + return row_dumper + + +cdef Transformer _tx_from_context(object context): + if isinstance(context, Transformer): + return context + else: + return Transformer(context) diff --git a/psycopg_c/psycopg_c/_psycopg/waiting.pyx b/psycopg_c/psycopg_c/_psycopg/waiting.pyx new file mode 100644 index 0000000..0af6c57 --- /dev/null +++ b/psycopg_c/psycopg_c/_psycopg/waiting.pyx @@ -0,0 +1,197 @@ +""" +C implementation of waiting functions +""" + +# Copyright (C) 2022 The Psycopg Team + +from cpython.object cimport PyObject_CallFunctionObjArgs + +cdef extern from *: + """ +#if defined(HAVE_POLL) && !defined(HAVE_BROKEN_POLL) + +#if defined(HAVE_POLL_H) +#include <poll.h> +#elif defined(HAVE_SYS_POLL_H) +#include <sys/poll.h> +#endif + +#else /* no poll available */ + +#ifdef MS_WINDOWS +#include <winsock2.h> +#else +#include <sys/select.h> +#endif + +#endif /* HAVE_POLL */ + +#define SELECT_EV_READ 1 +#define SELECT_EV_WRITE 2 + +#define SEC_TO_MS 1000 +#define SEC_TO_US (1000 * 1000) + +/* Use select to wait for readiness on fileno. + * + * - Return SELECT_EV_* if the file is ready + * - Return 0 on timeout + * - Return -1 (and set an exception) on error. + * + * The wisdom of this function comes from: + * + * - PostgreSQL libpq (see src/interfaces/libpq/fe-misc.c) + * - Python select module (see Modules/selectmodule.c) + */ +static int +wait_c_impl(int fileno, int wait, float timeout) +{ + int select_rv; + int rv = 0; + +#if defined(HAVE_POLL) && !defined(HAVE_BROKEN_POLL) + + struct pollfd input_fd; + int timeout_ms; + + input_fd.fd = fileno; + input_fd.events = POLLERR; + input_fd.revents = 0; + + if (wait & SELECT_EV_READ) { input_fd.events |= POLLIN; } + if (wait & SELECT_EV_WRITE) { input_fd.events |= POLLOUT; } + + if (timeout < 0.0) { + timeout_ms = -1; + } else { + timeout_ms = (int)(timeout * SEC_TO_MS); + } + + Py_BEGIN_ALLOW_THREADS + errno = 0; + select_rv = poll(&input_fd, 1, timeout_ms); + Py_END_ALLOW_THREADS + + if (PyErr_CheckSignals()) { goto finally; } + + if (select_rv < 0) { + goto error; + } + + if (input_fd.events & POLLIN) { rv |= SELECT_EV_READ; } + if (input_fd.events & POLLOUT) { rv |= SELECT_EV_WRITE; } + +#else + + fd_set ifds; + fd_set ofds; + fd_set efds; + struct timeval tv, *tvptr; + +#ifndef MS_WINDOWS + if (fileno >= 1024) { + PyErr_SetString( + PyExc_ValueError, /* same exception of Python's 'select.select()' */ + "connection file descriptor out of range for 'select()'"); + return -1; + } +#endif + + FD_ZERO(&ifds); + FD_ZERO(&ofds); + FD_ZERO(&efds); + + if (wait & SELECT_EV_READ) { FD_SET(fileno, &ifds); } + if (wait & SELECT_EV_WRITE) { FD_SET(fileno, &ofds); } + FD_SET(fileno, &efds); + + /* Compute appropriate timeout interval */ + if (timeout < 0.0) { + tvptr = NULL; + } + else { + tv.tv_sec = (int)timeout; + tv.tv_usec = (int)(((long)timeout * SEC_TO_US) % SEC_TO_US); + tvptr = &tv; + } + + Py_BEGIN_ALLOW_THREADS + errno = 0; + select_rv = select(fileno + 1, &ifds, &ofds, &efds, tvptr); + Py_END_ALLOW_THREADS + + if (PyErr_CheckSignals()) { goto finally; } + + if (select_rv < 0) { + goto error; + } + + if (FD_ISSET(fileno, &ifds)) { rv |= SELECT_EV_READ; } + if (FD_ISSET(fileno, &ofds)) { rv |= SELECT_EV_WRITE; } + +#endif /* HAVE_POLL */ + + return rv; + +error: + +#ifdef MS_WINDOWS + if (select_rv == SOCKET_ERROR) { + PyErr_SetExcFromWindowsErr(PyExc_OSError, WSAGetLastError()); + } +#else + if (select_rv < 0) { + PyErr_SetFromErrno(PyExc_OSError); + } +#endif + else { + PyErr_SetString(PyExc_OSError, "unexpected error from select()"); + } + +finally: + + return -1; + +} + """ + cdef int wait_c_impl(int fileno, int wait, float timeout) except -1 + + +def wait_c(gen: PQGen[RV], int fileno, timeout = None) -> RV: + """ + Wait for a generator using poll or select. + """ + cdef float ctimeout + cdef int wait, ready + cdef PyObject *pyready + + if timeout is None: + ctimeout = -1.0 + else: + ctimeout = float(timeout) + if ctimeout < 0.0: + ctimeout = -1.0 + + send = gen.send + + try: + wait = next(gen) + + while True: + ready = wait_c_impl(fileno, wait, ctimeout) + if ready == 0: + continue + elif ready == READY_R: + pyready = <PyObject *>PY_READY_R + elif ready == READY_RW: + pyready = <PyObject *>PY_READY_RW + elif ready == READY_W: + pyready = <PyObject *>PY_READY_W + else: + raise AssertionError(f"unexpected ready value: {ready}") + + wait = PyObject_CallFunctionObjArgs(send, pyready, NULL) + + except StopIteration as ex: + rv: RV = ex.args[0] if ex.args else None + return rv diff --git a/psycopg_c/psycopg_c/pq.pxd b/psycopg_c/psycopg_c/pq.pxd new file mode 100644 index 0000000..57825dd --- /dev/null +++ b/psycopg_c/psycopg_c/pq.pxd @@ -0,0 +1,78 @@ +# Include pid_t but Windows doesn't have it +# Don't use "IF" so that the generated C is portable and can be included +# in the sdist. +cdef extern from * nogil: + """ +#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) + typedef signed pid_t; +#else + #include <fcntl.h> +#endif + """ + ctypedef signed pid_t + +from psycopg_c.pq cimport libpq + +ctypedef char *(*conn_bytes_f) (const libpq.PGconn *) +ctypedef int(*conn_int_f) (const libpq.PGconn *) + + +cdef class PGconn: + cdef libpq.PGconn* _pgconn_ptr + cdef object __weakref__ + cdef public object notice_handler + cdef public object notify_handler + cdef pid_t _procpid + + @staticmethod + cdef PGconn _from_ptr(libpq.PGconn *ptr) + + cpdef int flush(self) except -1 + cpdef object notifies(self) + + +cdef class PGresult: + cdef libpq.PGresult* _pgresult_ptr + + @staticmethod + cdef PGresult _from_ptr(libpq.PGresult *ptr) + + +cdef class PGcancel: + cdef libpq.PGcancel* pgcancel_ptr + + @staticmethod + cdef PGcancel _from_ptr(libpq.PGcancel *ptr) + + +cdef class Escaping: + cdef PGconn conn + + cpdef escape_literal(self, data) + cpdef escape_identifier(self, data) + cpdef escape_string(self, data) + cpdef escape_bytea(self, data) + cpdef unescape_bytea(self, const unsigned char *data) + + +cdef class PQBuffer: + cdef unsigned char *buf + cdef Py_ssize_t len + + @staticmethod + cdef PQBuffer _from_buffer(unsigned char *buf, Py_ssize_t length) + + +cdef class ViewBuffer: + cdef unsigned char *buf + cdef Py_ssize_t len + cdef object obj + + @staticmethod + cdef ViewBuffer _from_buffer( + object obj, unsigned char *buf, Py_ssize_t length) + + +cdef int _buffer_as_string_and_size( + data: "Buffer", char **ptr, Py_ssize_t *length +) except -1 diff --git a/psycopg_c/psycopg_c/pq.pyx b/psycopg_c/psycopg_c/pq.pyx new file mode 100644 index 0000000..d397c17 --- /dev/null +++ b/psycopg_c/psycopg_c/pq.pyx @@ -0,0 +1,38 @@ +""" +libpq Python wrapper using cython bindings. +""" + +# Copyright (C) 2020 The Psycopg Team + +from psycopg_c.pq cimport libpq + +import logging + +from psycopg import errors as e +from psycopg.pq import Format +from psycopg.pq.misc import error_message + +logger = logging.getLogger("psycopg") + +__impl__ = 'c' +__build_version__ = libpq.PG_VERSION_NUM + + +def version(): + return libpq.PQlibVersion() + + +include "pq/pgconn.pyx" +include "pq/pgresult.pyx" +include "pq/pgcancel.pyx" +include "pq/conninfo.pyx" +include "pq/escaping.pyx" +include "pq/pqbuffer.pyx" + + +# importing the ssl module sets up Python's libcrypto callbacks +import ssl # noqa + +# disable libcrypto setup in libpq, so it won't stomp on the callbacks +# that have already been set up +libpq.PQinitOpenSSL(1, 0) diff --git a/psycopg_c/psycopg_c/pq/__init__.pxd b/psycopg_c/psycopg_c/pq/__init__.pxd new file mode 100644 index 0000000..ce8c60c --- /dev/null +++ b/psycopg_c/psycopg_c/pq/__init__.pxd @@ -0,0 +1,9 @@ +""" +psycopg_c.pq cython module. + +This file is necessary to allow c-importing pxd files from this directory. +""" + +# Copyright (C) 2020 The Psycopg Team + +from psycopg_c.pq cimport libpq diff --git a/psycopg_c/psycopg_c/pq/conninfo.pyx b/psycopg_c/psycopg_c/pq/conninfo.pyx new file mode 100644 index 0000000..3443de1 --- /dev/null +++ b/psycopg_c/psycopg_c/pq/conninfo.pyx @@ -0,0 +1,61 @@ +""" +psycopg_c.pq.Conninfo object implementation. +""" + +# Copyright (C) 2020 The Psycopg Team + +from psycopg.pq.misc import ConninfoOption + + +class Conninfo: + @classmethod + def get_defaults(cls) -> List[ConninfoOption]: + cdef libpq.PQconninfoOption *opts = libpq.PQconndefaults() + if opts is NULL : + raise MemoryError("couldn't allocate connection defaults") + rv = _options_from_array(opts) + libpq.PQconninfoFree(opts) + return rv + + @classmethod + def parse(cls, const char *conninfo) -> List[ConninfoOption]: + cdef char *errmsg = NULL + cdef libpq.PQconninfoOption *opts = libpq.PQconninfoParse(conninfo, &errmsg) + if opts is NULL: + if errmsg is NULL: + raise MemoryError("couldn't allocate on conninfo parse") + else: + exc = e.OperationalError(errmsg.decode("utf8", "replace")) + libpq.PQfreemem(errmsg) + raise exc + + rv = _options_from_array(opts) + libpq.PQconninfoFree(opts) + return rv + + def __repr__(self): + return f"<{type(self).__name__} ({self.keyword.decode('ascii')})>" + + +cdef _options_from_array(libpq.PQconninfoOption *opts): + rv = [] + cdef int i = 0 + cdef libpq.PQconninfoOption* opt + while True: + opt = opts + i + if opt.keyword is NULL: + break + rv.append( + ConninfoOption( + keyword=opt.keyword, + envvar=opt.envvar if opt.envvar is not NULL else None, + compiled=opt.compiled if opt.compiled is not NULL else None, + val=opt.val if opt.val is not NULL else None, + label=opt.label if opt.label is not NULL else None, + dispchar=opt.dispchar if opt.dispchar is not NULL else None, + dispsize=opt.dispsize, + ) + ) + i += 1 + + return rv diff --git a/psycopg_c/psycopg_c/pq/escaping.pyx b/psycopg_c/psycopg_c/pq/escaping.pyx new file mode 100644 index 0000000..f0a44d3 --- /dev/null +++ b/psycopg_c/psycopg_c/pq/escaping.pyx @@ -0,0 +1,132 @@ +""" +psycopg_c.pq.Escaping object implementation. +""" + +# Copyright (C) 2020 The Psycopg Team + +from libc.string cimport strlen +from cpython.mem cimport PyMem_Malloc, PyMem_Free + + +cdef class Escaping: + def __init__(self, PGconn conn = None): + self.conn = conn + + cpdef escape_literal(self, data): + cdef char *out + cdef char *ptr + cdef Py_ssize_t length + + if self.conn is None: + raise e.OperationalError("escape_literal failed: no connection provided") + if self.conn._pgconn_ptr is NULL: + raise e.OperationalError("the connection is closed") + + _buffer_as_string_and_size(data, &ptr, &length) + + out = libpq.PQescapeLiteral(self.conn._pgconn_ptr, ptr, length) + if out is NULL: + raise e.OperationalError( + f"escape_literal failed: {error_message(self.conn)}" + ) + + rv = out[:strlen(out)] + libpq.PQfreemem(out) + return rv + + cpdef escape_identifier(self, data): + cdef char *out + cdef char *ptr + cdef Py_ssize_t length + + _buffer_as_string_and_size(data, &ptr, &length) + + if self.conn is None: + raise e.OperationalError("escape_identifier failed: no connection provided") + if self.conn._pgconn_ptr is NULL: + raise e.OperationalError("the connection is closed") + + out = libpq.PQescapeIdentifier(self.conn._pgconn_ptr, ptr, length) + if out is NULL: + raise e.OperationalError( + f"escape_identifier failed: {error_message(self.conn)}" + ) + + rv = out[:strlen(out)] + libpq.PQfreemem(out) + return rv + + cpdef escape_string(self, data): + cdef int error + cdef size_t len_out + cdef char *ptr + cdef char *buf_out + cdef Py_ssize_t length + + _buffer_as_string_and_size(data, &ptr, &length) + + if self.conn is not None: + if self.conn._pgconn_ptr is NULL: + raise e.OperationalError("the connection is closed") + + buf_out = <char *>PyMem_Malloc(length * 2 + 1) + len_out = libpq.PQescapeStringConn( + self.conn._pgconn_ptr, buf_out, ptr, length, &error + ) + if error: + PyMem_Free(buf_out) + raise e.OperationalError( + f"escape_string failed: {error_message(self.conn)}" + ) + + else: + buf_out = <char *>PyMem_Malloc(length * 2 + 1) + len_out = libpq.PQescapeString(buf_out, ptr, length) + + rv = buf_out[:len_out] + PyMem_Free(buf_out) + return rv + + cpdef escape_bytea(self, data): + cdef size_t len_out + cdef unsigned char *out + cdef char *ptr + cdef Py_ssize_t length + + if self.conn is not None and self.conn._pgconn_ptr is NULL: + raise e.OperationalError("the connection is closed") + + _buffer_as_string_and_size(data, &ptr, &length) + + if self.conn is not None: + out = libpq.PQescapeByteaConn( + self.conn._pgconn_ptr, <unsigned char *>ptr, length, &len_out) + else: + out = libpq.PQescapeBytea(<unsigned char *>ptr, length, &len_out) + + if out is NULL: + raise MemoryError( + f"couldn't allocate for escape_bytea of {len(data)} bytes" + ) + + rv = out[:len_out - 1] # out includes final 0 + libpq.PQfreemem(out) + return rv + + cpdef unescape_bytea(self, const unsigned char *data): + # not needed, but let's keep it symmetric with the escaping: + # if a connection is passed in, it must be valid. + if self.conn is not None: + if self.conn._pgconn_ptr is NULL: + raise e.OperationalError("the connection is closed") + + cdef size_t len_out + cdef unsigned char *out = libpq.PQunescapeBytea(data, &len_out) + if out is NULL: + raise MemoryError( + f"couldn't allocate for unescape_bytea of {len(data)} bytes" + ) + + rv = out[:len_out] + libpq.PQfreemem(out) + return rv diff --git a/psycopg_c/psycopg_c/pq/libpq.pxd b/psycopg_c/psycopg_c/pq/libpq.pxd new file mode 100644 index 0000000..5e05e40 --- /dev/null +++ b/psycopg_c/psycopg_c/pq/libpq.pxd @@ -0,0 +1,321 @@ +""" +Libpq header definition for the cython psycopg.pq implementation. +""" + +# Copyright (C) 2020 The Psycopg Team + +cdef extern from "stdio.h": + + ctypedef struct FILE: + pass + +cdef extern from "pg_config.h": + + int PG_VERSION_NUM + + +cdef extern from "libpq-fe.h": + + # structures and types + + ctypedef unsigned int Oid + + ctypedef struct PGconn: + pass + + ctypedef struct PGresult: + pass + + ctypedef struct PQconninfoOption: + char *keyword + char *envvar + char *compiled + char *val + char *label + char *dispchar + int dispsize + + ctypedef struct PGnotify: + char *relname + int be_pid + char *extra + + ctypedef struct PGcancel: + pass + + ctypedef struct PGresAttDesc: + char *name + Oid tableid + int columnid + int format + Oid typid + int typlen + int atttypmod + + # enums + + ctypedef enum PostgresPollingStatusType: + PGRES_POLLING_FAILED = 0 + PGRES_POLLING_READING + PGRES_POLLING_WRITING + PGRES_POLLING_OK + PGRES_POLLING_ACTIVE + + + ctypedef enum PGPing: + PQPING_OK + PQPING_REJECT + PQPING_NO_RESPONSE + PQPING_NO_ATTEMPT + + ctypedef enum ConnStatusType: + CONNECTION_OK + CONNECTION_BAD + CONNECTION_STARTED + CONNECTION_MADE + CONNECTION_AWAITING_RESPONSE + CONNECTION_AUTH_OK + CONNECTION_SETENV + CONNECTION_SSL_STARTUP + CONNECTION_NEEDED + CONNECTION_CHECK_WRITABLE + CONNECTION_GSS_STARTUP + # CONNECTION_CHECK_TARGET PG 12 + + ctypedef enum PGTransactionStatusType: + PQTRANS_IDLE + PQTRANS_ACTIVE + PQTRANS_INTRANS + PQTRANS_INERROR + PQTRANS_UNKNOWN + + ctypedef enum ExecStatusType: + PGRES_EMPTY_QUERY = 0 + PGRES_COMMAND_OK + PGRES_TUPLES_OK + PGRES_COPY_OUT + PGRES_COPY_IN + PGRES_BAD_RESPONSE + PGRES_NONFATAL_ERROR + PGRES_FATAL_ERROR + PGRES_COPY_BOTH + PGRES_SINGLE_TUPLE + PGRES_PIPELINE_SYNC + PGRES_PIPELINE_ABORT + + # 33.1. Database Connection Control Functions + PGconn *PQconnectdb(const char *conninfo) + PGconn *PQconnectStart(const char *conninfo) + PostgresPollingStatusType PQconnectPoll(PGconn *conn) nogil + PQconninfoOption *PQconndefaults() + PQconninfoOption *PQconninfo(PGconn *conn) + PQconninfoOption *PQconninfoParse(const char *conninfo, char **errmsg) + void PQfinish(PGconn *conn) + void PQreset(PGconn *conn) + int PQresetStart(PGconn *conn) + PostgresPollingStatusType PQresetPoll(PGconn *conn) + PGPing PQping(const char *conninfo) + + # 33.2. Connection Status Functions + char *PQdb(const PGconn *conn) + char *PQuser(const PGconn *conn) + char *PQpass(const PGconn *conn) + char *PQhost(const PGconn *conn) + char *PQhostaddr(const PGconn *conn) + char *PQport(const PGconn *conn) + char *PQtty(const PGconn *conn) + char *PQoptions(const PGconn *conn) + ConnStatusType PQstatus(const PGconn *conn) + PGTransactionStatusType PQtransactionStatus(const PGconn *conn) + const char *PQparameterStatus(const PGconn *conn, const char *paramName) + int PQprotocolVersion(const PGconn *conn) + int PQserverVersion(const PGconn *conn) + char *PQerrorMessage(const PGconn *conn) + int PQsocket(const PGconn *conn) nogil + int PQbackendPID(const PGconn *conn) + int PQconnectionNeedsPassword(const PGconn *conn) + int PQconnectionUsedPassword(const PGconn *conn) + int PQsslInUse(PGconn *conn) # TODO: const in PG 12 docs - verify/report + # TODO: PQsslAttribute, PQsslAttributeNames, PQsslStruct, PQgetssl + + # 33.3. Command Execution Functions + PGresult *PQexec(PGconn *conn, const char *command) nogil + PGresult *PQexecParams(PGconn *conn, + const char *command, + int nParams, + const Oid *paramTypes, + const char * const *paramValues, + const int *paramLengths, + const int *paramFormats, + int resultFormat) nogil + PGresult *PQprepare(PGconn *conn, + const char *stmtName, + const char *query, + int nParams, + const Oid *paramTypes) nogil + PGresult *PQexecPrepared(PGconn *conn, + const char *stmtName, + int nParams, + const char * const *paramValues, + const int *paramLengths, + const int *paramFormats, + int resultFormat) nogil + PGresult *PQdescribePrepared(PGconn *conn, const char *stmtName) nogil + PGresult *PQdescribePortal(PGconn *conn, const char *portalName) nogil + ExecStatusType PQresultStatus(const PGresult *res) nogil + # PQresStatus: not needed, we have pretty enums + char *PQresultErrorMessage(const PGresult *res) nogil + # TODO: PQresultVerboseErrorMessage + char *PQresultErrorField(const PGresult *res, int fieldcode) nogil + void PQclear(PGresult *res) nogil + + # 33.3.2. Retrieving Query Result Information + int PQntuples(const PGresult *res) + int PQnfields(const PGresult *res) + char *PQfname(const PGresult *res, int column_number) + int PQfnumber(const PGresult *res, const char *column_name) + Oid PQftable(const PGresult *res, int column_number) + int PQftablecol(const PGresult *res, int column_number) + int PQfformat(const PGresult *res, int column_number) + Oid PQftype(const PGresult *res, int column_number) + int PQfmod(const PGresult *res, int column_number) + int PQfsize(const PGresult *res, int column_number) + int PQbinaryTuples(const PGresult *res) + char *PQgetvalue(const PGresult *res, int row_number, int column_number) + int PQgetisnull(const PGresult *res, int row_number, int column_number) + int PQgetlength(const PGresult *res, int row_number, int column_number) + int PQnparams(const PGresult *res) + Oid PQparamtype(const PGresult *res, int param_number) + # PQprint: pretty useless + + # 33.3.3. Retrieving Other Result Information + char *PQcmdStatus(PGresult *res) + char *PQcmdTuples(PGresult *res) + Oid PQoidValue(const PGresult *res) + + # 33.3.4. Escaping Strings for Inclusion in SQL Commands + char *PQescapeIdentifier(PGconn *conn, const char *str, size_t length) + char *PQescapeLiteral(PGconn *conn, const char *str, size_t length) + size_t PQescapeStringConn(PGconn *conn, + char *to, const char *from_, size_t length, + int *error) + size_t PQescapeString(char *to, const char *from_, size_t length) + unsigned char *PQescapeByteaConn(PGconn *conn, + const unsigned char *src, + size_t from_length, + size_t *to_length) + unsigned char *PQescapeBytea(const unsigned char *src, + size_t from_length, + size_t *to_length) + unsigned char *PQunescapeBytea(const unsigned char *src, size_t *to_length) + + + # 33.4. Asynchronous Command Processing + int PQsendQuery(PGconn *conn, const char *command) nogil + int PQsendQueryParams(PGconn *conn, + const char *command, + int nParams, + const Oid *paramTypes, + const char * const *paramValues, + const int *paramLengths, + const int *paramFormats, + int resultFormat) nogil + int PQsendPrepare(PGconn *conn, + const char *stmtName, + const char *query, + int nParams, + const Oid *paramTypes) nogil + int PQsendQueryPrepared(PGconn *conn, + const char *stmtName, + int nParams, + const char * const *paramValues, + const int *paramLengths, + const int *paramFormats, + int resultFormat) nogil + int PQsendDescribePrepared(PGconn *conn, const char *stmtName) nogil + int PQsendDescribePortal(PGconn *conn, const char *portalName) nogil + PGresult *PQgetResult(PGconn *conn) nogil + int PQconsumeInput(PGconn *conn) nogil + int PQisBusy(PGconn *conn) nogil + int PQsetnonblocking(PGconn *conn, int arg) nogil + int PQisnonblocking(const PGconn *conn) + int PQflush(PGconn *conn) nogil + + # 33.5. Retrieving Query Results Row-by-Row + int PQsetSingleRowMode(PGconn *conn) + + # 33.6. Canceling Queries in Progress + PGcancel *PQgetCancel(PGconn *conn) + void PQfreeCancel(PGcancel *cancel) + int PQcancel(PGcancel *cancel, char *errbuf, int errbufsize) + + # 33.8. Asynchronous Notification + PGnotify *PQnotifies(PGconn *conn) nogil + + # 33.9. Functions Associated with the COPY Command + int PQputCopyData(PGconn *conn, const char *buffer, int nbytes) nogil + int PQputCopyEnd(PGconn *conn, const char *errormsg) nogil + int PQgetCopyData(PGconn *conn, char **buffer, int async) nogil + + # 33.10. Control Functions + void PQtrace(PGconn *conn, FILE *stream); + void PQsetTraceFlags(PGconn *conn, int flags); + void PQuntrace(PGconn *conn); + + # 33.11. Miscellaneous Functions + void PQfreemem(void *ptr) nogil + void PQconninfoFree(PQconninfoOption *connOptions) + char *PQencryptPasswordConn( + PGconn *conn, const char *passwd, const char *user, const char *algorithm); + PGresult *PQmakeEmptyPGresult(PGconn *conn, ExecStatusType status) + int PQsetResultAttrs(PGresult *res, int numAttributes, PGresAttDesc *attDescs) + int PQlibVersion() + + # 33.12. Notice Processing + ctypedef void (*PQnoticeReceiver)(void *arg, const PGresult *res) + PQnoticeReceiver PQsetNoticeReceiver( + PGconn *conn, PQnoticeReceiver prog, void *arg) + + # 33.18. SSL Support + void PQinitOpenSSL(int do_ssl, int do_crypto) + + # 34.5 Pipeline Mode + + ctypedef enum PGpipelineStatus: + PQ_PIPELINE_OFF + PQ_PIPELINE_ON + PQ_PIPELINE_ABORTED + + PGpipelineStatus PQpipelineStatus(const PGconn *conn) + int PQenterPipelineMode(PGconn *conn) + int PQexitPipelineMode(PGconn *conn) + int PQpipelineSync(PGconn *conn) + int PQsendFlushRequest(PGconn *conn) + +cdef extern from *: + """ +/* Hack to allow the use of old libpq versions */ +#if PG_VERSION_NUM < 100000 +#define PQencryptPasswordConn(conn, passwd, user, algorithm) NULL +#endif + +#if PG_VERSION_NUM < 120000 +#define PQhostaddr(conn) NULL +#endif + +#if PG_VERSION_NUM < 140000 +#define PGRES_PIPELINE_SYNC 10 +#define PGRES_PIPELINE_ABORTED 11 +typedef enum { + PQ_PIPELINE_OFF, + PQ_PIPELINE_ON, + PQ_PIPELINE_ABORTED +} PGpipelineStatus; +#define PQpipelineStatus(conn) PQ_PIPELINE_OFF +#define PQenterPipelineMode(conn) 0 +#define PQexitPipelineMode(conn) 1 +#define PQpipelineSync(conn) 0 +#define PQsendFlushRequest(conn) 0 +#define PQsetTraceFlags(conn, stream) do {} while (0) +#endif +""" diff --git a/psycopg_c/psycopg_c/pq/pgcancel.pyx b/psycopg_c/psycopg_c/pq/pgcancel.pyx new file mode 100644 index 0000000..b7cbb70 --- /dev/null +++ b/psycopg_c/psycopg_c/pq/pgcancel.pyx @@ -0,0 +1,32 @@ +""" +psycopg_c.pq.PGcancel object implementation. +""" + +# Copyright (C) 2020 The Psycopg Team + + +cdef class PGcancel: + def __cinit__(self): + self.pgcancel_ptr = NULL + + @staticmethod + cdef PGcancel _from_ptr(libpq.PGcancel *ptr): + cdef PGcancel rv = PGcancel.__new__(PGcancel) + rv.pgcancel_ptr = ptr + return rv + + def __dealloc__(self) -> None: + self.free() + + def free(self) -> None: + if self.pgcancel_ptr is not NULL: + libpq.PQfreeCancel(self.pgcancel_ptr) + self.pgcancel_ptr = NULL + + def cancel(self) -> None: + cdef char buf[256] + cdef int res = libpq.PQcancel(self.pgcancel_ptr, buf, sizeof(buf)) + if not res: + raise e.OperationalError( + f"cancel failed: {buf.decode('utf8', 'ignore')}" + ) diff --git a/psycopg_c/psycopg_c/pq/pgconn.pyx b/psycopg_c/psycopg_c/pq/pgconn.pyx new file mode 100644 index 0000000..4a60530 --- /dev/null +++ b/psycopg_c/psycopg_c/pq/pgconn.pyx @@ -0,0 +1,733 @@ +""" +psycopg_c.pq.PGconn object implementation. +""" + +# Copyright (C) 2020 The Psycopg Team + +cdef extern from * nogil: + """ +#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) + /* We don't need a real definition for this because Windows is not affected + * by the issue caused by closing the fds after fork. + */ + #define getpid() (0) +#else + #include <unistd.h> +#endif + """ + pid_t getpid() + +from libc.stdio cimport fdopen +from cpython.mem cimport PyMem_Malloc, PyMem_Free +from cpython.bytes cimport PyBytes_AsString +from cpython.memoryview cimport PyMemoryView_FromObject + +import sys + +from psycopg.pq import Format as PqFormat, Trace +from psycopg.pq.misc import PGnotify, connection_summary +from psycopg_c.pq cimport PQBuffer + + +cdef class PGconn: + @staticmethod + cdef PGconn _from_ptr(libpq.PGconn *ptr): + cdef PGconn rv = PGconn.__new__(PGconn) + rv._pgconn_ptr = ptr + + libpq.PQsetNoticeReceiver(ptr, notice_receiver, <void *>rv) + return rv + + def __cinit__(self): + self._pgconn_ptr = NULL + self._procpid = getpid() + + def __dealloc__(self): + # Close the connection only if it was created in this process, + # not if this object is being GC'd after fork. + if self._procpid == getpid(): + self.finish() + + def __repr__(self) -> str: + cls = f"{self.__class__.__module__}.{self.__class__.__qualname__}" + info = connection_summary(self) + return f"<{cls} {info} at 0x{id(self):x}>" + + @classmethod + def connect(cls, const char *conninfo) -> PGconn: + cdef libpq.PGconn* pgconn = libpq.PQconnectdb(conninfo) + if not pgconn: + raise MemoryError("couldn't allocate PGconn") + + return PGconn._from_ptr(pgconn) + + @classmethod + def connect_start(cls, const char *conninfo) -> PGconn: + cdef libpq.PGconn* pgconn = libpq.PQconnectStart(conninfo) + if not pgconn: + raise MemoryError("couldn't allocate PGconn") + + return PGconn._from_ptr(pgconn) + + def connect_poll(self) -> int: + return _call_int(self, <conn_int_f>libpq.PQconnectPoll) + + def finish(self) -> None: + if self._pgconn_ptr is not NULL: + libpq.PQfinish(self._pgconn_ptr) + self._pgconn_ptr = NULL + + @property + def pgconn_ptr(self) -> Optional[int]: + if self._pgconn_ptr: + return <long long><void *>self._pgconn_ptr + else: + return None + + @property + def info(self) -> List["ConninfoOption"]: + _ensure_pgconn(self) + cdef libpq.PQconninfoOption *opts = libpq.PQconninfo(self._pgconn_ptr) + if opts is NULL: + raise MemoryError("couldn't allocate connection info") + rv = _options_from_array(opts) + libpq.PQconninfoFree(opts) + return rv + + def reset(self) -> None: + _ensure_pgconn(self) + libpq.PQreset(self._pgconn_ptr) + + def reset_start(self) -> None: + if not libpq.PQresetStart(self._pgconn_ptr): + raise e.OperationalError("couldn't reset connection") + + def reset_poll(self) -> int: + return _call_int(self, <conn_int_f>libpq.PQresetPoll) + + @classmethod + def ping(self, const char *conninfo) -> int: + return libpq.PQping(conninfo) + + @property + def db(self) -> bytes: + return _call_bytes(self, libpq.PQdb) + + @property + def user(self) -> bytes: + return _call_bytes(self, libpq.PQuser) + + @property + def password(self) -> bytes: + return _call_bytes(self, libpq.PQpass) + + @property + def host(self) -> bytes: + return _call_bytes(self, libpq.PQhost) + + @property + def hostaddr(self) -> bytes: + if libpq.PG_VERSION_NUM < 120000: + raise e.NotSupportedError( + f"PQhostaddr requires libpq from PostgreSQL 12," + f" {libpq.PG_VERSION_NUM} available instead" + ) + + _ensure_pgconn(self) + cdef char *rv = libpq.PQhostaddr(self._pgconn_ptr) + assert rv is not NULL + return rv + + @property + def port(self) -> bytes: + return _call_bytes(self, libpq.PQport) + + @property + def tty(self) -> bytes: + return _call_bytes(self, libpq.PQtty) + + @property + def options(self) -> bytes: + return _call_bytes(self, libpq.PQoptions) + + @property + def status(self) -> int: + return libpq.PQstatus(self._pgconn_ptr) + + @property + def transaction_status(self) -> int: + return libpq.PQtransactionStatus(self._pgconn_ptr) + + def parameter_status(self, const char *name) -> Optional[bytes]: + _ensure_pgconn(self) + cdef const char *rv = libpq.PQparameterStatus(self._pgconn_ptr, name) + if rv is not NULL: + return rv + else: + return None + + @property + def error_message(self) -> bytes: + return libpq.PQerrorMessage(self._pgconn_ptr) + + @property + def protocol_version(self) -> int: + return _call_int(self, libpq.PQprotocolVersion) + + @property + def server_version(self) -> int: + return _call_int(self, libpq.PQserverVersion) + + @property + def socket(self) -> int: + rv = _call_int(self, libpq.PQsocket) + if rv == -1: + raise e.OperationalError("the connection is lost") + return rv + + @property + def backend_pid(self) -> int: + return _call_int(self, libpq.PQbackendPID) + + @property + def needs_password(self) -> bool: + return bool(libpq.PQconnectionNeedsPassword(self._pgconn_ptr)) + + @property + def used_password(self) -> bool: + return bool(libpq.PQconnectionUsedPassword(self._pgconn_ptr)) + + @property + def ssl_in_use(self) -> bool: + return bool(_call_int(self, <conn_int_f>libpq.PQsslInUse)) + + def exec_(self, const char *command) -> PGresult: + _ensure_pgconn(self) + cdef libpq.PGresult *pgresult + with nogil: + pgresult = libpq.PQexec(self._pgconn_ptr, command) + if pgresult is NULL: + raise MemoryError("couldn't allocate PGresult") + + return PGresult._from_ptr(pgresult) + + def send_query(self, const char *command) -> None: + _ensure_pgconn(self) + cdef int rv + with nogil: + rv = libpq.PQsendQuery(self._pgconn_ptr, command) + if not rv: + raise e.OperationalError(f"sending query failed: {error_message(self)}") + + def exec_params( + self, + const char *command, + param_values: Optional[Sequence[Optional[bytes]]], + param_types: Optional[Sequence[int]] = None, + param_formats: Optional[Sequence[int]] = None, + int result_format = PqFormat.TEXT, + ) -> PGresult: + _ensure_pgconn(self) + + cdef Py_ssize_t cnparams + cdef libpq.Oid *ctypes + cdef char *const *cvalues + cdef int *clengths + cdef int *cformats + cnparams, ctypes, cvalues, clengths, cformats = _query_params_args( + param_values, param_types, param_formats) + + cdef libpq.PGresult *pgresult + with nogil: + pgresult = libpq.PQexecParams( + self._pgconn_ptr, command, <int>cnparams, ctypes, + <const char *const *>cvalues, clengths, cformats, result_format) + _clear_query_params(ctypes, cvalues, clengths, cformats) + if pgresult is NULL: + raise MemoryError("couldn't allocate PGresult") + return PGresult._from_ptr(pgresult) + + def send_query_params( + self, + const char *command, + param_values: Optional[Sequence[Optional[bytes]]], + param_types: Optional[Sequence[int]] = None, + param_formats: Optional[Sequence[int]] = None, + int result_format = PqFormat.TEXT, + ) -> None: + _ensure_pgconn(self) + + cdef Py_ssize_t cnparams + cdef libpq.Oid *ctypes + cdef char *const *cvalues + cdef int *clengths + cdef int *cformats + cnparams, ctypes, cvalues, clengths, cformats = _query_params_args( + param_values, param_types, param_formats) + + cdef int rv + with nogil: + rv = libpq.PQsendQueryParams( + self._pgconn_ptr, command, <int>cnparams, ctypes, + <const char *const *>cvalues, clengths, cformats, result_format) + _clear_query_params(ctypes, cvalues, clengths, cformats) + if not rv: + raise e.OperationalError( + f"sending query and params failed: {error_message(self)}" + ) + + def send_prepare( + self, + const char *name, + const char *command, + param_types: Optional[Sequence[int]] = None, + ) -> None: + _ensure_pgconn(self) + + cdef int i + cdef Py_ssize_t nparams = len(param_types) if param_types else 0 + cdef libpq.Oid *atypes = NULL + if nparams: + atypes = <libpq.Oid *>PyMem_Malloc(nparams * sizeof(libpq.Oid)) + for i in range(nparams): + atypes[i] = param_types[i] + + cdef int rv + with nogil: + rv = libpq.PQsendPrepare( + self._pgconn_ptr, name, command, <int>nparams, atypes + ) + PyMem_Free(atypes) + if not rv: + raise e.OperationalError( + f"sending query and params failed: {error_message(self)}" + ) + + def send_query_prepared( + self, + const char *name, + param_values: Optional[Sequence[Optional[bytes]]], + param_formats: Optional[Sequence[int]] = None, + int result_format = PqFormat.TEXT, + ) -> None: + _ensure_pgconn(self) + + cdef Py_ssize_t cnparams + cdef libpq.Oid *ctypes + cdef char *const *cvalues + cdef int *clengths + cdef int *cformats + cnparams, ctypes, cvalues, clengths, cformats = _query_params_args( + param_values, None, param_formats) + + cdef int rv + with nogil: + rv = libpq.PQsendQueryPrepared( + self._pgconn_ptr, name, <int>cnparams, <const char *const *>cvalues, + clengths, cformats, result_format) + _clear_query_params(ctypes, cvalues, clengths, cformats) + if not rv: + raise e.OperationalError( + f"sending prepared query failed: {error_message(self)}" + ) + + def prepare( + self, + const char *name, + const char *command, + param_types: Optional[Sequence[int]] = None, + ) -> PGresult: + _ensure_pgconn(self) + + cdef int i + cdef Py_ssize_t nparams = len(param_types) if param_types else 0 + cdef libpq.Oid *atypes = NULL + if nparams: + atypes = <libpq.Oid *>PyMem_Malloc(nparams * sizeof(libpq.Oid)) + for i in range(nparams): + atypes[i] = param_types[i] + + cdef libpq.PGresult *rv + with nogil: + rv = libpq.PQprepare( + self._pgconn_ptr, name, command, <int>nparams, atypes) + PyMem_Free(atypes) + if rv is NULL: + raise MemoryError("couldn't allocate PGresult") + return PGresult._from_ptr(rv) + + def exec_prepared( + self, + const char *name, + param_values: Optional[Sequence[bytes]], + param_formats: Optional[Sequence[int]] = None, + int result_format = PqFormat.TEXT, + ) -> PGresult: + _ensure_pgconn(self) + + cdef Py_ssize_t cnparams + cdef libpq.Oid *ctypes + cdef char *const *cvalues + cdef int *clengths + cdef int *cformats + cnparams, ctypes, cvalues, clengths, cformats = _query_params_args( + param_values, None, param_formats) + + cdef libpq.PGresult *rv + with nogil: + rv = libpq.PQexecPrepared( + self._pgconn_ptr, name, <int>cnparams, + <const char *const *>cvalues, + clengths, cformats, result_format) + + _clear_query_params(ctypes, cvalues, clengths, cformats) + if rv is NULL: + raise MemoryError("couldn't allocate PGresult") + return PGresult._from_ptr(rv) + + def describe_prepared(self, const char *name) -> PGresult: + _ensure_pgconn(self) + cdef libpq.PGresult *rv = libpq.PQdescribePrepared(self._pgconn_ptr, name) + if rv is NULL: + raise MemoryError("couldn't allocate PGresult") + return PGresult._from_ptr(rv) + + def send_describe_prepared(self, const char *name) -> None: + _ensure_pgconn(self) + cdef int rv = libpq.PQsendDescribePrepared(self._pgconn_ptr, name) + if not rv: + raise e.OperationalError( + f"sending describe prepared failed: {error_message(self)}" + ) + + def describe_portal(self, const char *name) -> PGresult: + _ensure_pgconn(self) + cdef libpq.PGresult *rv = libpq.PQdescribePortal(self._pgconn_ptr, name) + if rv is NULL: + raise MemoryError("couldn't allocate PGresult") + return PGresult._from_ptr(rv) + + def send_describe_portal(self, const char *name) -> None: + _ensure_pgconn(self) + cdef int rv = libpq.PQsendDescribePortal(self._pgconn_ptr, name) + if not rv: + raise e.OperationalError( + f"sending describe prepared failed: {error_message(self)}" + ) + + def get_result(self) -> Optional["PGresult"]: + cdef libpq.PGresult *pgresult = libpq.PQgetResult(self._pgconn_ptr) + if pgresult is NULL: + return None + return PGresult._from_ptr(pgresult) + + def consume_input(self) -> None: + if 1 != libpq.PQconsumeInput(self._pgconn_ptr): + raise e.OperationalError(f"consuming input failed: {error_message(self)}") + + def is_busy(self) -> int: + cdef int rv + with nogil: + rv = libpq.PQisBusy(self._pgconn_ptr) + return rv + + @property + def nonblocking(self) -> int: + return libpq.PQisnonblocking(self._pgconn_ptr) + + @nonblocking.setter + def nonblocking(self, int arg) -> None: + if 0 > libpq.PQsetnonblocking(self._pgconn_ptr, arg): + raise e.OperationalError(f"setting nonblocking failed: {error_message(self)}") + + cpdef int flush(self) except -1: + if self._pgconn_ptr == NULL: + raise e.OperationalError(f"flushing failed: the connection is closed") + cdef int rv = libpq.PQflush(self._pgconn_ptr) + if rv < 0: + raise e.OperationalError(f"flushing failed: {error_message(self)}") + return rv + + def set_single_row_mode(self) -> None: + if not libpq.PQsetSingleRowMode(self._pgconn_ptr): + raise e.OperationalError("setting single row mode failed") + + def get_cancel(self) -> PGcancel: + cdef libpq.PGcancel *ptr = libpq.PQgetCancel(self._pgconn_ptr) + if not ptr: + raise e.OperationalError("couldn't create cancel object") + return PGcancel._from_ptr(ptr) + + cpdef object notifies(self): + cdef libpq.PGnotify *ptr + with nogil: + ptr = libpq.PQnotifies(self._pgconn_ptr) + if ptr: + ret = PGnotify(ptr.relname, ptr.be_pid, ptr.extra) + libpq.PQfreemem(ptr) + return ret + else: + return None + + def put_copy_data(self, buffer) -> int: + cdef int rv + cdef char *cbuffer + cdef Py_ssize_t length + + _buffer_as_string_and_size(buffer, &cbuffer, &length) + rv = libpq.PQputCopyData(self._pgconn_ptr, cbuffer, <int>length) + if rv < 0: + raise e.OperationalError(f"sending copy data failed: {error_message(self)}") + return rv + + def put_copy_end(self, error: Optional[bytes] = None) -> int: + cdef int rv + cdef const char *cerr = NULL + if error is not None: + cerr = PyBytes_AsString(error) + rv = libpq.PQputCopyEnd(self._pgconn_ptr, cerr) + if rv < 0: + raise e.OperationalError(f"sending copy end failed: {error_message(self)}") + return rv + + def get_copy_data(self, int async_) -> Tuple[int, memoryview]: + cdef char *buffer_ptr = NULL + cdef int nbytes + nbytes = libpq.PQgetCopyData(self._pgconn_ptr, &buffer_ptr, async_) + if nbytes == -2: + raise e.OperationalError(f"receiving copy data failed: {error_message(self)}") + if buffer_ptr is not NULL: + data = PyMemoryView_FromObject( + PQBuffer._from_buffer(<unsigned char *>buffer_ptr, nbytes)) + return nbytes, data + else: + return nbytes, b"" # won't parse it, doesn't really be memoryview + + def trace(self, fileno: int) -> None: + if sys.platform != "linux": + raise e.NotSupportedError("currently only supported on Linux") + stream = fdopen(fileno, b"w") + libpq.PQtrace(self._pgconn_ptr, stream) + + def set_trace_flags(self, flags: Trace) -> None: + if libpq.PG_VERSION_NUM < 140000: + raise e.NotSupportedError( + f"PQsetTraceFlags requires libpq from PostgreSQL 14," + f" {libpq.PG_VERSION_NUM} available instead" + ) + libpq.PQsetTraceFlags(self._pgconn_ptr, flags) + + def untrace(self) -> None: + libpq.PQuntrace(self._pgconn_ptr) + + def encrypt_password( + self, const char *passwd, const char *user, algorithm = None + ) -> bytes: + if libpq.PG_VERSION_NUM < 100000: + raise e.NotSupportedError( + f"PQencryptPasswordConn requires libpq from PostgreSQL 10," + f" {libpq.PG_VERSION_NUM} available instead" + ) + + cdef char *out + cdef const char *calgo = NULL + if algorithm: + calgo = algorithm + out = libpq.PQencryptPasswordConn(self._pgconn_ptr, passwd, user, calgo) + if not out: + raise e.OperationalError( + f"password encryption failed: {error_message(self)}" + ) + + rv = bytes(out) + libpq.PQfreemem(out) + return rv + + def make_empty_result(self, int exec_status) -> PGresult: + cdef libpq.PGresult *rv = libpq.PQmakeEmptyPGresult( + self._pgconn_ptr, <libpq.ExecStatusType>exec_status) + if not rv: + raise MemoryError("couldn't allocate empty PGresult") + return PGresult._from_ptr(rv) + + @property + def pipeline_status(self) -> int: + """The current pipeline mode status. + + For libpq < 14.0, always return 0 (PQ_PIPELINE_OFF). + """ + if libpq.PG_VERSION_NUM < 140000: + return libpq.PQ_PIPELINE_OFF + cdef int status = libpq.PQpipelineStatus(self._pgconn_ptr) + return status + + def enter_pipeline_mode(self) -> None: + """Enter pipeline mode. + + :raises ~e.OperationalError: in case of failure to enter the pipeline + mode. + """ + if libpq.PG_VERSION_NUM < 140000: + raise e.NotSupportedError( + f"PQenterPipelineMode requires libpq from PostgreSQL 14," + f" {libpq.PG_VERSION_NUM} available instead" + ) + if libpq.PQenterPipelineMode(self._pgconn_ptr) != 1: + raise e.OperationalError("failed to enter pipeline mode") + + def exit_pipeline_mode(self) -> None: + """Exit pipeline mode. + + :raises ~e.OperationalError: in case of failure to exit the pipeline + mode. + """ + if libpq.PG_VERSION_NUM < 140000: + raise e.NotSupportedError( + f"PQexitPipelineMode requires libpq from PostgreSQL 14," + f" {libpq.PG_VERSION_NUM} available instead" + ) + if libpq.PQexitPipelineMode(self._pgconn_ptr) != 1: + raise e.OperationalError(error_message(self)) + + def pipeline_sync(self) -> None: + """Mark a synchronization point in a pipeline. + + :raises ~e.OperationalError: if the connection is not in pipeline mode + or if sync failed. + """ + if libpq.PG_VERSION_NUM < 140000: + raise e.NotSupportedError( + f"PQpipelineSync requires libpq from PostgreSQL 14," + f" {libpq.PG_VERSION_NUM} available instead" + ) + rv = libpq.PQpipelineSync(self._pgconn_ptr) + if rv == 0: + raise e.OperationalError("connection not in pipeline mode") + if rv != 1: + raise e.OperationalError("failed to sync pipeline") + + def send_flush_request(self) -> None: + """Sends a request for the server to flush its output buffer. + + :raises ~e.OperationalError: if the flush request failed. + """ + if libpq.PG_VERSION_NUM < 140000: + raise e.NotSupportedError( + f"PQsendFlushRequest requires libpq from PostgreSQL 14," + f" {libpq.PG_VERSION_NUM} available instead" + ) + cdef int rv = libpq.PQsendFlushRequest(self._pgconn_ptr) + if rv == 0: + raise e.OperationalError(f"flush request failed: {error_message(self)}") + + +cdef int _ensure_pgconn(PGconn pgconn) except 0: + if pgconn._pgconn_ptr is not NULL: + return 1 + + raise e.OperationalError("the connection is closed") + + +cdef char *_call_bytes(PGconn pgconn, conn_bytes_f func) except NULL: + """ + Call one of the pgconn libpq functions returning a bytes pointer. + """ + if not _ensure_pgconn(pgconn): + return NULL + cdef char *rv = func(pgconn._pgconn_ptr) + assert rv is not NULL + return rv + + +cdef int _call_int(PGconn pgconn, conn_int_f func) except -2: + """ + Call one of the pgconn libpq functions returning an int. + """ + if not _ensure_pgconn(pgconn): + return -2 + return func(pgconn._pgconn_ptr) + + +cdef void notice_receiver(void *arg, const libpq.PGresult *res_ptr) with gil: + cdef PGconn pgconn = <object>arg + if pgconn.notice_handler is None: + return + + cdef PGresult res = PGresult._from_ptr(<libpq.PGresult *>res_ptr) + try: + pgconn.notice_handler(res) + except Exception as e: + logger.exception("error in notice receiver: %s", e) + finally: + res._pgresult_ptr = NULL # avoid destroying the pgresult_ptr + + +cdef (Py_ssize_t, libpq.Oid *, char * const*, int *, int *) _query_params_args( + list param_values: Optional[Sequence[Optional[bytes]]], + param_types: Optional[Sequence[int]], + list param_formats: Optional[Sequence[int]], +) except *: + cdef int i + + # the PostgresQuery converts the param_types to tuple, so this operation + # is most often no-op + cdef tuple tparam_types + if param_types is not None and not isinstance(param_types, tuple): + tparam_types = tuple(param_types) + else: + tparam_types = param_types + + cdef Py_ssize_t nparams = len(param_values) if param_values else 0 + if tparam_types is not None and len(tparam_types) != nparams: + raise ValueError( + "got %d param_values but %d param_types" + % (nparams, len(tparam_types)) + ) + if param_formats is not None and len(param_formats) != nparams: + raise ValueError( + "got %d param_values but %d param_formats" + % (nparams, len(param_formats)) + ) + + cdef char **aparams = NULL + cdef int *alenghts = NULL + cdef char *ptr + cdef Py_ssize_t length + + if nparams: + aparams = <char **>PyMem_Malloc(nparams * sizeof(char *)) + alenghts = <int *>PyMem_Malloc(nparams * sizeof(int)) + for i in range(nparams): + obj = param_values[i] + if obj is None: + aparams[i] = NULL + alenghts[i] = 0 + else: + # TODO: it is a leak if this fails (but it should only fail + # on internal error, e.g. if obj is not a buffer) + _buffer_as_string_and_size(obj, &ptr, &length) + aparams[i] = ptr + alenghts[i] = <int>length + + cdef libpq.Oid *atypes = NULL + if tparam_types: + atypes = <libpq.Oid *>PyMem_Malloc(nparams * sizeof(libpq.Oid)) + for i in range(nparams): + atypes[i] = tparam_types[i] + + cdef int *aformats = NULL + if param_formats is not None: + aformats = <int *>PyMem_Malloc(nparams * sizeof(int *)) + for i in range(nparams): + aformats[i] = param_formats[i] + + return (nparams, atypes, aparams, alenghts, aformats) + + +cdef void _clear_query_params( + libpq.Oid *ctypes, char *const *cvalues, int *clenghst, int *cformats +): + PyMem_Free(ctypes) + PyMem_Free(<char **>cvalues) + PyMem_Free(clenghst) + PyMem_Free(cformats) diff --git a/psycopg_c/psycopg_c/pq/pgresult.pyx b/psycopg_c/psycopg_c/pq/pgresult.pyx new file mode 100644 index 0000000..6df42e8 --- /dev/null +++ b/psycopg_c/psycopg_c/pq/pgresult.pyx @@ -0,0 +1,157 @@ +""" +psycopg_c.pq.PGresult object implementation. +""" + +# Copyright (C) 2020 The Psycopg Team + +cimport cython +from cpython.mem cimport PyMem_Malloc, PyMem_Free + +from psycopg.pq.misc import PGresAttDesc +from psycopg.pq._enums import ExecStatus + + +@cython.freelist(8) +cdef class PGresult: + def __cinit__(self): + self._pgresult_ptr = NULL + + @staticmethod + cdef PGresult _from_ptr(libpq.PGresult *ptr): + cdef PGresult rv = PGresult.__new__(PGresult) + rv._pgresult_ptr = ptr + return rv + + def __dealloc__(self) -> None: + self.clear() + + def __repr__(self) -> str: + cls = f"{self.__class__.__module__}.{self.__class__.__qualname__}" + status = ExecStatus(self.status) + return f"<{cls} [{status.name}] at 0x{id(self):x}>" + + def clear(self) -> None: + if self._pgresult_ptr is not NULL: + libpq.PQclear(self._pgresult_ptr) + self._pgresult_ptr = NULL + + @property + def pgresult_ptr(self) -> Optional[int]: + if self._pgresult_ptr: + return <long long><void *>self._pgresult_ptr + else: + return None + + @property + def status(self) -> int: + return libpq.PQresultStatus(self._pgresult_ptr) + + @property + def error_message(self) -> bytes: + return libpq.PQresultErrorMessage(self._pgresult_ptr) + + def error_field(self, int fieldcode) -> Optional[bytes]: + cdef char * rv = libpq.PQresultErrorField(self._pgresult_ptr, fieldcode) + if rv is not NULL: + return rv + else: + return None + + @property + def ntuples(self) -> int: + return libpq.PQntuples(self._pgresult_ptr) + + @property + def nfields(self) -> int: + return libpq.PQnfields(self._pgresult_ptr) + + def fname(self, int column_number) -> Optional[bytes]: + cdef char *rv = libpq.PQfname(self._pgresult_ptr, column_number) + if rv is not NULL: + return rv + else: + return None + + def ftable(self, int column_number) -> int: + return libpq.PQftable(self._pgresult_ptr, column_number) + + def ftablecol(self, int column_number) -> int: + return libpq.PQftablecol(self._pgresult_ptr, column_number) + + def fformat(self, int column_number) -> int: + return libpq.PQfformat(self._pgresult_ptr, column_number) + + def ftype(self, int column_number) -> int: + return libpq.PQftype(self._pgresult_ptr, column_number) + + def fmod(self, int column_number) -> int: + return libpq.PQfmod(self._pgresult_ptr, column_number) + + def fsize(self, int column_number) -> int: + return libpq.PQfsize(self._pgresult_ptr, column_number) + + @property + def binary_tuples(self) -> int: + return libpq.PQbinaryTuples(self._pgresult_ptr) + + def get_value(self, int row_number, int column_number) -> Optional[bytes]: + cdef int crow = row_number + cdef int ccol = column_number + cdef int length = libpq.PQgetlength(self._pgresult_ptr, crow, ccol) + cdef char *v + if length: + v = libpq.PQgetvalue(self._pgresult_ptr, crow, ccol) + # TODO: avoid copy + return v[:length] + else: + if libpq.PQgetisnull(self._pgresult_ptr, crow, ccol): + return None + else: + return b"" + + @property + def nparams(self) -> int: + return libpq.PQnparams(self._pgresult_ptr) + + def param_type(self, int param_number) -> int: + return libpq.PQparamtype(self._pgresult_ptr, param_number) + + @property + def command_status(self) -> Optional[bytes]: + cdef char *rv = libpq.PQcmdStatus(self._pgresult_ptr) + if rv is not NULL: + return rv + else: + return None + + @property + def command_tuples(self) -> Optional[int]: + cdef char *rv = libpq.PQcmdTuples(self._pgresult_ptr) + if rv is NULL: + return None + cdef bytes brv = rv + return int(brv) if brv else None + + @property + def oid_value(self) -> int: + return libpq.PQoidValue(self._pgresult_ptr) + + def set_attributes(self, descriptions: List[PGresAttDesc]): + cdef Py_ssize_t num = len(descriptions) + cdef libpq.PGresAttDesc *attrs = <libpq.PGresAttDesc *>PyMem_Malloc( + num * sizeof(libpq.PGresAttDesc)) + + for i in range(num): + descr = descriptions[i] + attrs[i].name = descr.name + attrs[i].tableid = descr.tableid + attrs[i].columnid = descr.columnid + attrs[i].format = descr.format + attrs[i].typid = descr.typid + attrs[i].typlen = descr.typlen + attrs[i].atttypmod = descr.atttypmod + + cdef int res = libpq.PQsetResultAttrs(self._pgresult_ptr, <int>num, attrs) + PyMem_Free(attrs) + if (res == 0): + raise e.OperationalError("PQsetResultAttrs failed") diff --git a/psycopg_c/psycopg_c/pq/pqbuffer.pyx b/psycopg_c/psycopg_c/pq/pqbuffer.pyx new file mode 100644 index 0000000..eb5d648 --- /dev/null +++ b/psycopg_c/psycopg_c/pq/pqbuffer.pyx @@ -0,0 +1,111 @@ +""" +PQbuffer object implementation. +""" + +# Copyright (C) 2020 The Psycopg Team + +cimport cython +from cpython.bytes cimport PyBytes_AsStringAndSize +from cpython.buffer cimport PyObject_CheckBuffer, PyBUF_SIMPLE +from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release + + +@cython.freelist(32) +cdef class PQBuffer: + """ + Wrap a chunk of memory allocated by the libpq and expose it as memoryview. + """ + @staticmethod + cdef PQBuffer _from_buffer(unsigned char *buf, Py_ssize_t length): + cdef PQBuffer rv = PQBuffer.__new__(PQBuffer) + rv.buf = buf + rv.len = length + return rv + + def __cinit__(self): + self.buf = NULL + self.len = 0 + + def __dealloc__(self): + if self.buf: + libpq.PQfreemem(self.buf) + + def __repr__(self): + return ( + f"{self.__class__.__module__}.{self.__class__.__qualname__}" + f"({bytes(self)})" + ) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + buffer.buf = self.buf + buffer.obj = self + buffer.len = self.len + buffer.itemsize = sizeof(unsigned char) + buffer.readonly = 1 + buffer.ndim = 1 + buffer.format = NULL # unsigned char + buffer.shape = &self.len + buffer.strides = NULL + buffer.suboffsets = NULL + buffer.internal = NULL + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + +@cython.freelist(32) +cdef class ViewBuffer: + """ + Wrap a chunk of memory owned by a different object. + """ + @staticmethod + cdef ViewBuffer _from_buffer( + object obj, unsigned char *buf, Py_ssize_t length + ): + cdef ViewBuffer rv = ViewBuffer.__new__(ViewBuffer) + rv.obj = obj + rv.buf = buf + rv.len = length + return rv + + def __cinit__(self): + self.buf = NULL + self.len = 0 + + def __repr__(self): + return ( + f"{self.__class__.__module__}.{self.__class__.__qualname__}" + f"({bytes(self)})" + ) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + buffer.buf = self.buf + buffer.obj = self + buffer.len = self.len + buffer.itemsize = sizeof(unsigned char) + buffer.readonly = 1 + buffer.ndim = 1 + buffer.format = NULL # unsigned char + buffer.shape = &self.len + buffer.strides = NULL + buffer.suboffsets = NULL + buffer.internal = NULL + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + +cdef int _buffer_as_string_and_size( + data: "Buffer", char **ptr, Py_ssize_t *length +) except -1: + cdef Py_buffer buf + + if isinstance(data, bytes): + PyBytes_AsStringAndSize(data, ptr, length) + elif PyObject_CheckBuffer(data): + PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) + ptr[0] = <char *>buf.buf + length[0] = buf.len + PyBuffer_Release(&buf) + else: + raise TypeError(f"bytes or buffer expected, got {type(data)}") diff --git a/psycopg_c/psycopg_c/py.typed b/psycopg_c/psycopg_c/py.typed new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/psycopg_c/psycopg_c/py.typed diff --git a/psycopg_c/psycopg_c/types/array.pyx b/psycopg_c/psycopg_c/types/array.pyx new file mode 100644 index 0000000..9abaef9 --- /dev/null +++ b/psycopg_c/psycopg_c/types/array.pyx @@ -0,0 +1,276 @@ +""" +C optimized functions to manipulate arrays +""" + +# Copyright (C) 2022 The Psycopg Team + +import cython + +from libc.stdint cimport int32_t, uint32_t +from libc.string cimport memset, strchr +from cpython.mem cimport PyMem_Realloc, PyMem_Free +from cpython.ref cimport Py_INCREF +from cpython.list cimport PyList_New,PyList_Append, PyList_GetSlice +from cpython.list cimport PyList_GET_ITEM, PyList_SET_ITEM, PyList_GET_SIZE +from cpython.object cimport PyObject + +from psycopg_c.pq cimport _buffer_as_string_and_size +from psycopg_c.pq.libpq cimport Oid +from psycopg_c._psycopg cimport endian + +from psycopg import errors as e + +cdef extern from *: + """ +/* Defined in PostgreSQL in src/include/utils/array.h */ +#define MAXDIM 6 + """ + const int MAXDIM + + +cdef class ArrayLoader(_CRecursiveLoader): + + format = PQ_TEXT + base_oid = 0 + delimiter = b"," + + cdef PyObject *row_loader + cdef char cdelim + + # A memory area which used to unescape elements. + # Keep it here to avoid a malloc per element and to set up exceptions + # to make sure to free it on error. + cdef char *scratch + cdef size_t sclen + + cdef object cload(self, const char *data, size_t length): + if self.cdelim == b"\x00": + self.row_loader = self._tx._c_get_loader( + <PyObject *>self.base_oid, <PyObject *>PQ_TEXT) + self.cdelim = self.delimiter[0] + + return _array_load_text( + data, length, self.row_loader, self.cdelim, + &(self.scratch), &(self.sclen)) + + def __dealloc__(self): + PyMem_Free(self.scratch) + + +@cython.final +cdef class ArrayBinaryLoader(_CRecursiveLoader): + + format = PQ_BINARY + + cdef PyObject *row_loader + + cdef object cload(self, const char *data, size_t length): + rv = _array_load_binary(data, length, self._tx, &(self.row_loader)) + return rv + + +cdef object _array_load_text( + const char *buf, size_t length, PyObject *row_loader, char cdelim, + char **scratch, size_t *sclen +): + if length == 0: + raise e.DataError("malformed array: empty data") + + cdef const char *end = buf + length + + # Remove the dimensions information prefix (``[...]=``) + if buf[0] == b"[": + buf = strchr(buf + 1, b'=') + if buf == NULL: + raise e.DataError("malformed array: no '=' after dimension information") + buf += 1 + + # TODO: further optimization: pre-scan the array to find the array + # dimensions, so that we can preallocate the list sized instead of calling + # append, which is the dominating operation + + cdef list stack = [] + cdef list a = [] + rv = a + cdef PyObject *tmp + + cdef CLoader cloader = None + cdef object pyload = None + if (<RowLoader>row_loader).cloader is not None: + cloader = (<RowLoader>row_loader).cloader + else: + pyload = (<RowLoader>row_loader).loadfunc + + while buf < end: + if buf[0] == b'{': + if stack: + tmp = PyList_GET_ITEM(stack, PyList_GET_SIZE(stack) - 1) + PyList_Append(<object>tmp, a) + PyList_Append(stack, a) + a = [] + buf += 1 + + elif buf[0] == b'}': + if not stack: + raise e.DataError("malformed array: unexpected '}'") + rv = stack.pop() + buf += 1 + + elif buf[0] == cdelim: + buf += 1 + + else: + v = _parse_token( + &buf, end, cdelim, scratch, sclen, cloader, pyload) + if not stack: + raise e.DataError("malformed array: missing initial '{'") + tmp = PyList_GET_ITEM(stack, PyList_GET_SIZE(stack) - 1) + PyList_Append(<object>tmp, v) + + return rv + + +cdef object _parse_token( + const char **bufptr, const char *bufend, char cdelim, + char **scratch, size_t *sclen, CLoader cloader, object load +): + cdef const char *start = bufptr[0] + cdef int has_quotes = start[0] == b'"' + cdef int quoted = has_quotes + cdef int num_escapes = 0 + cdef int escaped = 0 + + if has_quotes: + start += 1 + cdef const char *end = start + + while end < bufend: + if (end[0] == cdelim or end[0] == b'}') and not quoted: + break + elif end[0] == b'\\' and not escaped: + num_escapes += 1 + escaped = 1 + end += 1 + continue + elif end[0] == b'"' and not escaped: + quoted = 0 + escaped = 0 + end += 1 + else: + raise e.DataError("malformed array: hit the end of the buffer") + + # Return the new position for the buffer + bufptr[0] = end + if has_quotes: + end -= 1 + + cdef int length = (end - start) + if length == 4 and not has_quotes \ + and start[0] == b'N' and start[1] == b'U' \ + and start[2] == b'L' and start[3] == b'L': + return None + + cdef const char *src + cdef char *tgt + cdef size_t unesclen + + if not num_escapes: + if cloader is not None: + return cloader.cload(start, length) + else: + b = start[:length] + return load(b) + + else: + unesclen = length - num_escapes + 1 + if unesclen > sclen[0]: + scratch[0] = <char *>PyMem_Realloc(scratch[0], unesclen) + sclen[0] = unesclen + + src = start + tgt = scratch[0] + while src < end: + if src[0] == b'\\': + src += 1 + tgt[0] = src[0] + src += 1 + tgt += 1 + + tgt[0] = b'\x00' + + if cloader is not None: + return cloader.cload(scratch[0], length - num_escapes) + else: + b = scratch[0][:length - num_escapes] + return load(b) + + +@cython.cdivision(True) +cdef object _array_load_binary( + const char *buf, size_t length, Transformer tx, PyObject **row_loader_ptr +): + # head is ndims, hasnull, elem oid + cdef uint32_t *buf32 = <uint32_t *>buf + cdef int ndims = endian.be32toh(buf32[0]) + + if ndims <= 0: + return [] + elif ndims > MAXDIM: + raise e.DataError( + r"unexpected number of dimensions %s exceeding the maximum allowed %s" + % (ndims, MAXDIM) + ) + + cdef object oid + if row_loader_ptr[0] == NULL: + oid = <Oid>endian.be32toh(buf32[2]) + row_loader_ptr[0] = tx._c_get_loader(<PyObject *>oid, <PyObject *>PQ_BINARY) + + cdef Py_ssize_t[MAXDIM] dims + cdef int i + for i in range(ndims): + # Every dimension is dim, lower bound + dims[i] = endian.be32toh(buf32[3 + 2 * i]) + + buf += (3 + 2 * ndims) * sizeof(uint32_t) + out = _array_load_binary_rec(ndims, dims, &buf, row_loader_ptr[0]) + return out + + +cdef object _array_load_binary_rec( + Py_ssize_t ndims, Py_ssize_t *dims, const char **bufptr, PyObject *row_loader +): + cdef const char *buf + cdef int i + cdef int32_t size + cdef object val + + cdef Py_ssize_t nelems = dims[0] + cdef list out = PyList_New(nelems) + + if ndims == 1: + buf = bufptr[0] + for i in range(nelems): + size = <int32_t>endian.be32toh((<uint32_t *>buf)[0]) + buf += sizeof(uint32_t) + if size == -1: + val = None + else: + if (<RowLoader>row_loader).cloader is not None: + val = (<RowLoader>row_loader).cloader.cload(buf, size) + else: + val = (<RowLoader>row_loader).loadfunc(buf[:size]) + buf += size + + Py_INCREF(val) + PyList_SET_ITEM(out, i, val) + + bufptr[0] = buf + + else: + for i in range(nelems): + val = _array_load_binary_rec(ndims - 1, dims + 1, bufptr, row_loader) + Py_INCREF(val) + PyList_SET_ITEM(out, i, val) + + return out diff --git a/psycopg_c/psycopg_c/types/bool.pyx b/psycopg_c/psycopg_c/types/bool.pyx new file mode 100644 index 0000000..86cf88e --- /dev/null +++ b/psycopg_c/psycopg_c/types/bool.pyx @@ -0,0 +1,78 @@ +""" +Cython adapters for boolean. +""" + +# Copyright (C) 2020 The Psycopg Team + +cimport cython + + +@cython.final +cdef class BoolDumper(CDumper): + + format = PQ_TEXT + oid = oids.BOOL_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef char *buf = CDumper.ensure_size(rv, offset, 1) + + # Fast paths, just a pointer comparison + if obj is True: + buf[0] = b"t" + elif obj is False: + buf[0] = b"f" + elif obj: + buf[0] = b"t" + else: + buf[0] = b"f" + + return 1 + + def quote(self, obj: bool) -> bytes: + if obj is True: + return b"true" + elif obj is False: + return b"false" + else: + return b"true" if obj else b"false" + + +@cython.final +cdef class BoolBinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.BOOL_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef char *buf = CDumper.ensure_size(rv, offset, 1) + + # Fast paths, just a pointer comparison + if obj is True: + buf[0] = b"\x01" + elif obj is False: + buf[0] = b"\x00" + elif obj: + buf[0] = b"\x01" + else: + buf[0] = b"\x00" + + return 1 + + +@cython.final +cdef class BoolLoader(CLoader): + + format = PQ_TEXT + + cdef object cload(self, const char *data, size_t length): + # this creates better C than `return data[0] == b't'` + return True if data[0] == b't' else False + + +@cython.final +cdef class BoolBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + return True if data[0] else False diff --git a/psycopg_c/psycopg_c/types/datetime.pyx b/psycopg_c/psycopg_c/types/datetime.pyx new file mode 100644 index 0000000..51e7dcf --- /dev/null +++ b/psycopg_c/psycopg_c/types/datetime.pyx @@ -0,0 +1,1136 @@ +""" +Cython adapters for date/time types. +""" + +# Copyright (C) 2021 The Psycopg Team + +from libc.string cimport memset, strchr +from cpython cimport datetime as cdt +from cpython.dict cimport PyDict_GetItem +from cpython.object cimport PyObject, PyObject_CallFunctionObjArgs + +cdef extern from "Python.h": + const char *PyUnicode_AsUTF8AndSize(unicode obj, Py_ssize_t *size) except NULL + object PyTimeZone_FromOffset(object offset) + +cdef extern from *: + """ +/* Multipliers from fraction of seconds to microseconds */ +static int _uspad[] = {0, 100000, 10000, 1000, 100, 10, 1}; + """ + cdef int *_uspad + +from datetime import date, time, timedelta, datetime, timezone + +from psycopg_c._psycopg cimport endian + +from psycopg import errors as e +from psycopg._compat import ZoneInfo + + +# Initialise the datetime C API +cdt.import_datetime() + +cdef enum: + ORDER_YMD = 0 + ORDER_DMY = 1 + ORDER_MDY = 2 + ORDER_PGDM = 3 + ORDER_PGMD = 4 + +cdef enum: + INTERVALSTYLE_OTHERS = 0 + INTERVALSTYLE_SQL_STANDARD = 1 + INTERVALSTYLE_POSTGRES = 2 + +cdef enum: + PG_DATE_EPOCH_DAYS = 730120 # date(2000, 1, 1).toordinal() + PY_DATE_MIN_DAYS = 1 # date.min.toordinal() + +cdef object date_toordinal = date.toordinal +cdef object date_fromordinal = date.fromordinal +cdef object datetime_astimezone = datetime.astimezone +cdef object time_utcoffset = time.utcoffset +cdef object timedelta_total_seconds = timedelta.total_seconds +cdef object timezone_utc = timezone.utc +cdef object pg_datetime_epoch = datetime(2000, 1, 1) +cdef object pg_datetimetz_epoch = datetime(2000, 1, 1, tzinfo=timezone.utc) + +cdef object _month_abbr = { + n: i + for i, n in enumerate( + b"Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split(), 1 + ) +} + + +@cython.final +cdef class DateDumper(CDumper): + + format = PQ_TEXT + oid = oids.DATE_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef Py_ssize_t size; + cdef const char *src + + # NOTE: whatever the PostgreSQL DateStyle input format (DMY, MDY, YMD) + # the YYYY-MM-DD is always understood correctly. + cdef str s = str(obj) + src = PyUnicode_AsUTF8AndSize(s, &size) + + cdef char *buf = CDumper.ensure_size(rv, offset, size) + memcpy(buf, src, size) + return size + + +@cython.final +cdef class DateBinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.DATE_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef int32_t days = PyObject_CallFunctionObjArgs( + date_toordinal, <PyObject *>obj, NULL) + days -= PG_DATE_EPOCH_DAYS + cdef int32_t *buf = <int32_t *>CDumper.ensure_size( + rv, offset, sizeof(int32_t)) + buf[0] = endian.htobe32(days) + return sizeof(int32_t) + + +cdef class _BaseTimeDumper(CDumper): + + cpdef get_key(self, obj, format): + # Use (cls,) to report the need to upgrade to a dumper for timetz (the + # Frankenstein of the data types). + if not obj.tzinfo: + return self.cls + else: + return (self.cls,) + + cpdef upgrade(self, obj: time, format): + raise NotImplementedError + + +cdef class _BaseTimeTextDumper(_BaseTimeDumper): + + format = PQ_TEXT + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef Py_ssize_t size; + cdef const char *src + + cdef str s = str(obj) + src = PyUnicode_AsUTF8AndSize(s, &size) + + cdef char *buf = CDumper.ensure_size(rv, offset, size) + memcpy(buf, src, size) + return size + + +@cython.final +cdef class TimeDumper(_BaseTimeTextDumper): + + oid = oids.TIME_OID + + cpdef upgrade(self, obj, format): + if not obj.tzinfo: + return self + else: + return TimeTzDumper(self.cls) + + +@cython.final +cdef class TimeTzDumper(_BaseTimeTextDumper): + + oid = oids.TIMETZ_OID + + +@cython.final +cdef class TimeBinaryDumper(_BaseTimeDumper): + + format = PQ_BINARY + oid = oids.TIME_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef int64_t micros = cdt.time_microsecond(obj) + 1000000 * ( + cdt.time_second(obj) + + 60 * (cdt.time_minute(obj) + 60 * <int64_t>cdt.time_hour(obj)) + ) + + cdef int64_t *buf = <int64_t *>CDumper.ensure_size( + rv, offset, sizeof(int64_t)) + buf[0] = endian.htobe64(micros) + return sizeof(int64_t) + + cpdef upgrade(self, obj, format): + if not obj.tzinfo: + return self + else: + return TimeTzBinaryDumper(self.cls) + + +@cython.final +cdef class TimeTzBinaryDumper(_BaseTimeDumper): + + format = PQ_BINARY + oid = oids.TIMETZ_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef int64_t micros = cdt.time_microsecond(obj) + 1_000_000 * ( + cdt.time_second(obj) + + 60 * (cdt.time_minute(obj) + 60 * <int64_t>cdt.time_hour(obj)) + ) + + off = PyObject_CallFunctionObjArgs(time_utcoffset, <PyObject *>obj, NULL) + cdef int32_t offsec = int(PyObject_CallFunctionObjArgs( + timedelta_total_seconds, <PyObject *>off, NULL)) + + cdef char *buf = CDumper.ensure_size( + rv, offset, sizeof(int64_t) + sizeof(int32_t)) + (<int64_t *>buf)[0] = endian.htobe64(micros) + (<int32_t *>(buf + sizeof(int64_t)))[0] = endian.htobe32(-offsec) + + return sizeof(int64_t) + sizeof(int32_t) + + +cdef class _BaseDatetimeDumper(CDumper): + + cpdef get_key(self, obj, format): + # Use (cls,) to report the need to upgrade (downgrade, actually) to a + # dumper for naive timestamp. + if obj.tzinfo: + return self.cls + else: + return (self.cls,) + + cpdef upgrade(self, obj: time, format): + raise NotImplementedError + + +cdef class _BaseDatetimeTextDumper(_BaseDatetimeDumper): + + format = PQ_TEXT + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef Py_ssize_t size; + cdef const char *src + + # NOTE: whatever the PostgreSQL DateStyle input format (DMY, MDY, YMD) + # the YYYY-MM-DD is always understood correctly. + cdef str s = str(obj) + src = PyUnicode_AsUTF8AndSize(s, &size) + + cdef char *buf = CDumper.ensure_size(rv, offset, size) + memcpy(buf, src, size) + return size + + +@cython.final +cdef class DatetimeDumper(_BaseDatetimeTextDumper): + + oid = oids.TIMESTAMPTZ_OID + + cpdef upgrade(self, obj, format): + if obj.tzinfo: + return self + else: + return DatetimeNoTzDumper(self.cls) + + +@cython.final +cdef class DatetimeNoTzDumper(_BaseDatetimeTextDumper): + + oid = oids.TIMESTAMP_OID + + +@cython.final +cdef class DatetimeBinaryDumper(_BaseDatetimeDumper): + + format = PQ_BINARY + oid = oids.TIMESTAMPTZ_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + delta = obj - pg_datetimetz_epoch + + cdef int64_t micros = cdt.timedelta_microseconds(delta) + 1_000_000 * ( + 86_400 * <int64_t>cdt.timedelta_days(delta) + + <int64_t>cdt.timedelta_seconds(delta)) + + cdef char *buf = CDumper.ensure_size(rv, offset, sizeof(int64_t)) + (<int64_t *>buf)[0] = endian.htobe64(micros) + return sizeof(int64_t) + + cpdef upgrade(self, obj, format): + if obj.tzinfo: + return self + else: + return DatetimeNoTzBinaryDumper(self.cls) + + +@cython.final +cdef class DatetimeNoTzBinaryDumper(_BaseDatetimeDumper): + + format = PQ_BINARY + oid = oids.TIMESTAMP_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + delta = obj - pg_datetime_epoch + + cdef int64_t micros = cdt.timedelta_microseconds(delta) + 1_000_000 * ( + 86_400 * <int64_t>cdt.timedelta_days(delta) + + <int64_t>cdt.timedelta_seconds(delta)) + + cdef char *buf = CDumper.ensure_size(rv, offset, sizeof(int64_t)) + (<int64_t *>buf)[0] = endian.htobe64(micros) + return sizeof(int64_t) + + +@cython.final +cdef class TimedeltaDumper(CDumper): + + format = PQ_TEXT + oid = oids.INTERVAL_OID + cdef int _style + + def __cinit__(self, cls, context: Optional[AdaptContext] = None): + + cdef const char *ds = _get_intervalstyle(self._pgconn) + if ds[0] == b's': # sql_standard + self._style = INTERVALSTYLE_SQL_STANDARD + else: # iso_8601, postgres, postgres_verbose + self._style = INTERVALSTYLE_OTHERS + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef Py_ssize_t size; + cdef const char *src + + cdef str s + if self._style == INTERVALSTYLE_OTHERS: + # The comma is parsed ok by PostgreSQL but it's not documented + # and it seems brittle to rely on it. CRDB doesn't consume it well. + s = str(obj).replace(",", "") + else: + # sql_standard format needs explicit signs + # otherwise -1 day 1 sec will mean -1 sec + s = "%+d day %+d second %+d microsecond" % ( + obj.days, obj.seconds, obj.microseconds) + + src = PyUnicode_AsUTF8AndSize(s, &size) + + cdef char *buf = CDumper.ensure_size(rv, offset, size) + memcpy(buf, src, size) + return size + + +@cython.final +cdef class TimedeltaBinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.INTERVAL_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef int64_t micros = ( + 1_000_000 * <int64_t>cdt.timedelta_seconds(obj) + + cdt.timedelta_microseconds(obj)) + cdef int32_t days = cdt.timedelta_days(obj) + + cdef char *buf = CDumper.ensure_size( + rv, offset, sizeof(int64_t) + sizeof(int32_t) + sizeof(int32_t)) + (<int64_t *>buf)[0] = endian.htobe64(micros) + (<int32_t *>(buf + sizeof(int64_t)))[0] = endian.htobe32(days) + (<int32_t *>(buf + sizeof(int64_t) + sizeof(int32_t)))[0] = 0 + + return sizeof(int64_t) + sizeof(int32_t) + sizeof(int32_t) + + +@cython.final +cdef class DateLoader(CLoader): + + format = PQ_TEXT + cdef int _order + + def __cinit__(self, oid: int, context: Optional[AdaptContext] = None): + + cdef const char *ds = _get_datestyle(self._pgconn) + if ds[0] == b'I': # ISO + self._order = ORDER_YMD + elif ds[0] == b'G': # German + self._order = ORDER_DMY + elif ds[0] == b'S': # SQL, DMY / MDY + self._order = ORDER_DMY if ds[5] == b'D' else ORDER_MDY + elif ds[0] == b'P': # Postgres, DMY / MDY + self._order = ORDER_DMY if ds[10] == b'D' else ORDER_MDY + else: + raise e.InterfaceError(f"unexpected DateStyle: {ds.decode('ascii')}") + + cdef object _error_date(self, const char *data, str msg): + s = bytes(data).decode("utf8", "replace") + if s == "infinity" or len(s.split()[0]) > 10: + raise e.DataError(f"date too large (after year 10K): {s!r}") from None + elif s == "-infinity" or "BC" in s: + raise e.DataError(f"date too small (before year 1): {s!r}") from None + else: + raise e.DataError(f"can't parse date {s!r}: {msg}") from None + + cdef object cload(self, const char *data, size_t length): + if length != 10: + self._error_date(data, "unexpected length") + + cdef int vals[3] + memset(vals, 0, sizeof(vals)) + + cdef const char *ptr + cdef const char *end = data + length + ptr = _parse_date_values(data, end, vals, ARRAYSIZE(vals)) + if ptr == NULL: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse date {s!r}") + + try: + if self._order == ORDER_YMD: + return cdt.date_new(vals[0], vals[1], vals[2]) + elif self._order == ORDER_DMY: + return cdt.date_new(vals[2], vals[1], vals[0]) + else: + return cdt.date_new(vals[2], vals[0], vals[1]) + except ValueError as ex: + self._error_date(data, str(ex)) + + +@cython.final +cdef class DateBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + cdef int days = endian.be32toh((<uint32_t *>data)[0]) + cdef object pydays = days + PG_DATE_EPOCH_DAYS + try: + return PyObject_CallFunctionObjArgs( + date_fromordinal, <PyObject *>pydays, NULL) + except ValueError: + if days < PY_DATE_MIN_DAYS: + raise e.DataError("date too small (before year 1)") from None + else: + raise e.DataError("date too large (after year 10K)") from None + + +@cython.final +cdef class TimeLoader(CLoader): + + format = PQ_TEXT + + cdef object cload(self, const char *data, size_t length): + + cdef int vals[3] + memset(vals, 0, sizeof(vals)) + cdef const char *ptr + cdef const char *end = data + length + + # Parse the first 3 groups of digits + ptr = _parse_date_values(data, end, vals, ARRAYSIZE(vals)) + if ptr == NULL: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse time {s!r}") + + # Parse the microseconds + cdef int us = 0 + if ptr[0] == b".": + ptr = _parse_micros(ptr + 1, &us) + + try: + return cdt.time_new(vals[0], vals[1], vals[2], us, None) + except ValueError as ex: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse time {s!r}: {ex}") from None + + +@cython.final +cdef class TimeBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + cdef int64_t val = endian.be64toh((<uint64_t *>data)[0]) + cdef int h, m, s, us + + with cython.cdivision(True): + us = val % 1_000_000 + val //= 1_000_000 + + s = val % 60 + val //= 60 + + m = val % 60 + h = <int>(val // 60) + + try: + return cdt.time_new(h, m, s, us, None) + except ValueError: + raise e.DataError( + f"time not supported by Python: hour={h}" + ) from None + + +@cython.final +cdef class TimetzLoader(CLoader): + + format = PQ_TEXT + + cdef object cload(self, const char *data, size_t length): + + cdef int vals[3] + memset(vals, 0, sizeof(vals)) + cdef const char *ptr + cdef const char *end = data + length + + # Parse the first 3 groups of digits (time) + ptr = _parse_date_values(data, end, vals, ARRAYSIZE(vals)) + if ptr == NULL: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse timetz {s!r}") + + # Parse the microseconds + cdef int us = 0 + if ptr[0] == b".": + ptr = _parse_micros(ptr + 1, &us) + + # Parse the timezone + cdef int offsecs = _parse_timezone_to_seconds(&ptr, end) + if ptr == NULL: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse timetz {s!r}") + + tz = _timezone_from_seconds(offsecs) + try: + return cdt.time_new(vals[0], vals[1], vals[2], us, tz) + except ValueError as ex: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse timetz {s!r}: {ex}") from None + + +@cython.final +cdef class TimetzBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + cdef int64_t val = endian.be64toh((<uint64_t *>data)[0]) + cdef int32_t off = endian.be32toh((<uint32_t *>(data + sizeof(int64_t)))[0]) + cdef int h, m, s, us + + with cython.cdivision(True): + us = val % 1_000_000 + val //= 1_000_000 + + s = val % 60 + val //= 60 + + m = val % 60 + h = <int>(val // 60) + + tz = _timezone_from_seconds(-off) + try: + return cdt.time_new(h, m, s, us, tz) + except ValueError: + raise e.DataError( + f"time not supported by Python: hour={h}" + ) from None + + +@cython.final +cdef class TimestampLoader(CLoader): + + format = PQ_TEXT + cdef int _order + + def __cinit__(self, oid: int, context: Optional[AdaptContext] = None): + + cdef const char *ds = _get_datestyle(self._pgconn) + if ds[0] == b'I': # ISO + self._order = ORDER_YMD + elif ds[0] == b'G': # German + self._order = ORDER_DMY + elif ds[0] == b'S': # SQL, DMY / MDY + self._order = ORDER_DMY if ds[5] == b'D' else ORDER_MDY + elif ds[0] == b'P': # Postgres, DMY / MDY + self._order = ORDER_PGDM if ds[10] == b'D' else ORDER_PGMD + else: + raise e.InterfaceError(f"unexpected DateStyle: {ds.decode('ascii')}") + + cdef object cload(self, const char *data, size_t length): + cdef const char *end = data + length + if end[-1] == b'C': # ends with BC + raise _get_timestamp_load_error(self._pgconn, data) from None + + if self._order == ORDER_PGDM or self._order == ORDER_PGMD: + return self._cload_pg(data, end) + + cdef int vals[6] + memset(vals, 0, sizeof(vals)) + cdef const char *ptr + + # Parse the first 6 groups of digits (date and time) + ptr = _parse_date_values(data, end, vals, ARRAYSIZE(vals)) + if ptr == NULL: + raise _get_timestamp_load_error(self._pgconn, data) from None + + # Parse the microseconds + cdef int us = 0 + if ptr[0] == b".": + ptr = _parse_micros(ptr + 1, &us) + + # Resolve the YMD order + cdef int y, m, d + if self._order == ORDER_YMD: + y, m, d = vals[0], vals[1], vals[2] + elif self._order == ORDER_DMY: + d, m, y = vals[0], vals[1], vals[2] + else: # self._order == ORDER_MDY + m, d, y = vals[0], vals[1], vals[2] + + try: + return cdt.datetime_new( + y, m, d, vals[3], vals[4], vals[5], us, None) + except ValueError as ex: + raise _get_timestamp_load_error(self._pgconn, data, ex) from None + + cdef object _cload_pg(self, const char *data, const char *end): + cdef int vals[4] + memset(vals, 0, sizeof(vals)) + cdef const char *ptr + + # Find Wed Jun 02 or Wed 02 Jun + cdef char *seps[3] + seps[0] = strchr(data, b' ') + seps[1] = strchr(seps[0] + 1, b' ') if seps[0] != NULL else NULL + seps[2] = strchr(seps[1] + 1, b' ') if seps[1] != NULL else NULL + if seps[2] == NULL: + raise _get_timestamp_load_error(self._pgconn, data) from None + + # Parse the following 3 groups of digits (time) + ptr = _parse_date_values(seps[2] + 1, end, vals, 3) + if ptr == NULL: + raise _get_timestamp_load_error(self._pgconn, data) from None + + # Parse the microseconds + cdef int us = 0 + if ptr[0] == b".": + ptr = _parse_micros(ptr + 1, &us) + + # Parse the year + ptr = _parse_date_values(ptr + 1, end, vals + 3, 1) + if ptr == NULL: + raise _get_timestamp_load_error(self._pgconn, data) from None + + # Resolve the MD order + cdef int m, d + try: + if self._order == ORDER_PGDM: + d = int(seps[0][1 : seps[1] - seps[0]]) + m = _month_abbr[seps[1][1 : seps[2] - seps[1]]] + else: # self._order == ORDER_PGMD + m = _month_abbr[seps[0][1 : seps[1] - seps[0]]] + d = int(seps[1][1 : seps[2] - seps[1]]) + except (KeyError, ValueError) as ex: + raise _get_timestamp_load_error(self._pgconn, data, ex) from None + + try: + return cdt.datetime_new( + vals[3], m, d, vals[0], vals[1], vals[2], us, None) + except ValueError as ex: + raise _get_timestamp_load_error(self._pgconn, data, ex) from None + + +@cython.final +cdef class TimestampBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + cdef int64_t val = endian.be64toh((<uint64_t *>data)[0]) + cdef int64_t micros, secs, days + + # Work only with positive values as the cdivision behaves differently + # with negative values, and cdivision=False adds overhead. + cdef int64_t aval = val if val >= 0 else -val + + # Group the micros in biggers stuff or timedelta_new might overflow + with cython.cdivision(True): + secs = aval // 1_000_000 + micros = aval % 1_000_000 + + days = secs // 86_400 + secs %= 86_400 + + try: + delta = cdt.timedelta_new(<int>days, <int>secs, <int>micros) + if val > 0: + return pg_datetime_epoch + delta + else: + return pg_datetime_epoch - delta + + except OverflowError: + if val <= 0: + raise e.DataError("timestamp too small (before year 1)") from None + else: + raise e.DataError("timestamp too large (after year 10K)") from None + + +cdef class _BaseTimestamptzLoader(CLoader): + cdef object _time_zone + + def __cinit__(self, oid: int, context: Optional[AdaptContext] = None): + self._time_zone = _timezone_from_connection(self._pgconn) + + +@cython.final +cdef class TimestamptzLoader(_BaseTimestamptzLoader): + + format = PQ_TEXT + cdef int _order + + def __cinit__(self, oid: int, context: Optional[AdaptContext] = None): + + cdef const char *ds = _get_datestyle(self._pgconn) + if ds[0] == b'I': # ISO + self._order = ORDER_YMD + else: # Not true, but any non-YMD will do. + self._order = ORDER_DMY + + cdef object cload(self, const char *data, size_t length): + if self._order != ORDER_YMD: + return self._cload_notimpl(data, length) + + cdef const char *end = data + length + if end[-1] == b'C': # ends with BC + raise _get_timestamp_load_error(self._pgconn, data) from None + + cdef int vals[6] + memset(vals, 0, sizeof(vals)) + + # Parse the first 6 groups of digits (date and time) + cdef const char *ptr + ptr = _parse_date_values(data, end, vals, ARRAYSIZE(vals)) + if ptr == NULL: + raise _get_timestamp_load_error(self._pgconn, data) from None + + # Parse the microseconds + cdef int us = 0 + if ptr[0] == b".": + ptr = _parse_micros(ptr + 1, &us) + + # Resolve the YMD order + cdef int y, m, d + if self._order == ORDER_YMD: + y, m, d = vals[0], vals[1], vals[2] + elif self._order == ORDER_DMY: + d, m, y = vals[0], vals[1], vals[2] + else: # self._order == ORDER_MDY + m, d, y = vals[0], vals[1], vals[2] + + # Parse the timezone + cdef int offsecs = _parse_timezone_to_seconds(&ptr, end) + if ptr == NULL: + raise _get_timestamp_load_error(self._pgconn, data) from None + + tzoff = cdt.timedelta_new(0, offsecs, 0) + + # The return value is a datetime with the timezone of the connection + # (in order to be consistent with the binary loader, which is the only + # thing it can return). So create a temporary datetime object, in utc, + # shift it by the offset parsed from the timestamp, and then move it to + # the connection timezone. + dt = None + try: + dt = cdt.datetime_new( + y, m, d, vals[3], vals[4], vals[5], us, timezone_utc) + dt -= tzoff + return PyObject_CallFunctionObjArgs(datetime_astimezone, + <PyObject *>dt, <PyObject *>self._time_zone, NULL) + except OverflowError as ex: + # If we have created the temporary 'dt' it means that we have a + # datetime close to max, the shift pushed it past max, overflowing. + # In this case return the datetime in a fixed offset timezone. + if dt is not None: + return dt.replace(tzinfo=timezone(tzoff)) + else: + ex1 = ex + except ValueError as ex: + ex1 = ex + + raise _get_timestamp_load_error(self._pgconn, data, ex1) from None + + cdef object _cload_notimpl(self, const char *data, size_t length): + s = bytes(data)[:length].decode("utf8", "replace") + ds = _get_datestyle(self._pgconn).decode() + raise NotImplementedError( + f"can't parse timestamptz with DateStyle {ds!r}: {s!r}" + ) + + +@cython.final +cdef class TimestamptzBinaryLoader(_BaseTimestamptzLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + cdef int64_t val = endian.be64toh((<uint64_t *>data)[0]) + cdef int64_t micros, secs, days + + # Work only with positive values as the cdivision behaves differently + # with negative values, and cdivision=False adds overhead. + cdef int64_t aval = val if val >= 0 else -val + + # Group the micros in biggers stuff or timedelta_new might overflow + with cython.cdivision(True): + secs = aval // 1_000_000 + micros = aval % 1_000_000 + + days = secs // 86_400 + secs %= 86_400 + + try: + delta = cdt.timedelta_new(<int>days, <int>secs, <int>micros) + if val > 0: + dt = pg_datetimetz_epoch + delta + else: + dt = pg_datetimetz_epoch - delta + return PyObject_CallFunctionObjArgs(datetime_astimezone, + <PyObject *>dt, <PyObject *>self._time_zone, NULL) + + except OverflowError: + # If we were asked about a timestamp which would overflow in UTC, + # but not in the desired timezone (e.g. datetime.max at Chicago + # timezone) we can still save the day by shifting the value by the + # timezone offset and then replacing the timezone. + if self._time_zone is not None: + utcoff = self._time_zone.utcoffset( + datetime.min if val < 0 else datetime.max + ) + if utcoff: + usoff = 1_000_000 * int(utcoff.total_seconds()) + try: + ts = pg_datetime_epoch + timedelta( + microseconds=val + usoff + ) + except OverflowError: + pass # will raise downstream + else: + return ts.replace(tzinfo=self._time_zone) + + if val <= 0: + raise e.DataError( + "timestamp too small (before year 1)" + ) from None + else: + raise e.DataError( + "timestamp too large (after year 10K)" + ) from None + + +@cython.final +cdef class IntervalLoader(CLoader): + + format = PQ_TEXT + cdef int _style + + def __cinit__(self, oid: int, context: Optional[AdaptContext] = None): + + cdef const char *ds = _get_intervalstyle(self._pgconn) + if ds[0] == b'p' and ds[8] == 0: # postgres + self._style = INTERVALSTYLE_POSTGRES + else: # iso_8601, sql_standard, postgres_verbose + self._style = INTERVALSTYLE_OTHERS + + cdef object cload(self, const char *data, size_t length): + if self._style == INTERVALSTYLE_OTHERS: + return self._cload_notimpl(data, length) + + cdef int days = 0, secs = 0, us = 0 + cdef char sign + cdef int val + cdef const char *ptr = data + cdef const char *sep + cdef const char *end = ptr + length + + # If there are spaces, there is a [+|-]n [days|months|years] + while True: + if ptr[0] == b'-' or ptr[0] == b'+': + sign = ptr[0] + ptr += 1 + else: + sign = 0 + + sep = strchr(ptr, b' ') + if sep == NULL or sep > end: + break + + val = 0 + ptr = _parse_date_values(ptr, end, &val, 1) + if ptr == NULL: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse interval {s!r}") + + if sign == b'-': + val = -val + + if ptr[1] == b'y': + days = 365 * val + elif ptr[1] == b'm': + days = 30 * val + elif ptr[1] == b'd': + days = val + else: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse interval {s!r}") + + # Skip the date part word. + ptr = strchr(ptr + 1, b' ') + if ptr != NULL and ptr < end: + ptr += 1 + else: + break + + # Parse the time part. An eventual sign was already consumed in the loop + cdef int vals[3] + memset(vals, 0, sizeof(vals)) + if ptr != NULL: + ptr = _parse_date_values(ptr, end, vals, ARRAYSIZE(vals)) + if ptr == NULL: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse interval {s!r}") + + secs = vals[2] + 60 * (vals[1] + 60 * vals[0]) + + if ptr[0] == b'.': + ptr = _parse_micros(ptr + 1, &us) + + if sign == b'-': + secs = -secs + us = -us + + try: + return cdt.timedelta_new(days, secs, us) + except OverflowError as ex: + s = bytes(data).decode("utf8", "replace") + raise e.DataError(f"can't parse interval {s!r}: {ex}") from None + + cdef object _cload_notimpl(self, const char *data, size_t length): + s = bytes(data).decode("utf8", "replace") + style = _get_intervalstyle(self._pgconn).decode() + raise NotImplementedError( + f"can't parse interval with IntervalStyle {style!r}: {s!r}" + ) + + +@cython.final +cdef class IntervalBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + cdef int64_t val = endian.be64toh((<uint64_t *>data)[0]) + cdef int32_t days = endian.be32toh( + (<uint32_t *>(data + sizeof(int64_t)))[0]) + cdef int32_t months = endian.be32toh( + (<uint32_t *>(data + sizeof(int64_t) + sizeof(int32_t)))[0]) + + cdef int years + with cython.cdivision(True): + if months > 0: + years = months // 12 + months %= 12 + days += 30 * months + 365 * years + elif months < 0: + months = -months + years = months // 12 + months %= 12 + days -= 30 * months + 365 * years + + # Work only with positive values as the cdivision behaves differently + # with negative values, and cdivision=False adds overhead. + cdef int64_t aval = val if val >= 0 else -val + cdef int us, ussecs, usdays + + # Group the micros in biggers stuff or timedelta_new might overflow + with cython.cdivision(True): + ussecs = <int>(aval // 1_000_000) + us = aval % 1_000_000 + + usdays = ussecs // 86_400 + ussecs %= 86_400 + + if val < 0: + ussecs = -ussecs + usdays = -usdays + us = -us + + try: + return cdt.timedelta_new(days + usdays, ussecs, us) + except OverflowError as ex: + raise e.DataError(f"can't parse interval: {ex}") + + +cdef const char *_parse_date_values( + const char *ptr, const char *end, int *vals, int nvals +): + """ + Parse *nvals* numeric values separated by non-numeric chars. + + Write the result in the *vals* array (assumed zeroed) starting from *start*. + + Return the pointer at the separator after the final digit. + """ + cdef int ival = 0 + while ptr < end: + if b'0' <= ptr[0] <= b'9': + vals[ival] = vals[ival] * 10 + (ptr[0] - <char>b'0') + else: + ival += 1 + if ival >= nvals: + break + + ptr += 1 + + return ptr + + +cdef const char *_parse_micros(const char *start, int *us): + """ + Parse microseconds from a string. + + Micros are assumed up to 6 digit chars separated by a non-digit. + + Return the pointer at the separator after the final digit. + """ + cdef const char *ptr = start + while ptr[0]: + if b'0' <= ptr[0] <= b'9': + us[0] = us[0] * 10 + (ptr[0] - <char>b'0') + else: + break + + ptr += 1 + + # Pad the fraction of second to get millis + if us[0] and ptr - start < 6: + us[0] *= _uspad[ptr - start] + + return ptr + + +cdef int _parse_timezone_to_seconds(const char **bufptr, const char *end): + """ + Parse a timezone from a string, return Python timezone object. + + Modify the buffer pointer to point at the first character after the + timezone parsed. In case of parse error make it NULL. + """ + cdef const char *ptr = bufptr[0] + cdef char sgn = ptr[0] + + # Parse at most three groups of digits + cdef int vals[3] + memset(vals, 0, sizeof(vals)) + + ptr = _parse_date_values(ptr + 1, end, vals, ARRAYSIZE(vals)) + if ptr == NULL: + return 0 + + cdef int off = 60 * (60 * vals[0] + vals[1]) + vals[2] + return -off if sgn == b"-" else off + + +cdef object _timezone_from_seconds(int sec, __cache={}): + cdef object pysec = sec + cdef PyObject *ptr = PyDict_GetItem(__cache, pysec) + if ptr != NULL: + return <object>ptr + + delta = cdt.timedelta_new(0, sec, 0) + tz = timezone(delta) + __cache[pysec] = tz + return tz + + +cdef object _get_timestamp_load_error( + pq.PGconn pgconn, const char *data, ex: Optional[Exception] = None +): + s = bytes(data).decode("utf8", "replace") + + def is_overflow(s): + if not s: + return False + + ds = _get_datestyle(pgconn) + if not ds.startswith(b"P"): # Postgres + return len(s.split()[0]) > 10 # date is first token + else: + return len(s.split()[-1]) > 4 # year is last token + + if s == "-infinity" or s.endswith("BC"): + return e.DataError("timestamp too small (before year 1): {s!r}") + elif s == "infinity" or is_overflow(s): + return e.DataError(f"timestamp too large (after year 10K): {s!r}") + else: + return e.DataError(f"can't parse timestamp {s!r}: {ex or '(unknown)'}") + + +cdef _timezones = {} +_timezones[None] = timezone_utc +_timezones[b"UTC"] = timezone_utc + + +cdef object _timezone_from_connection(pq.PGconn pgconn): + """Return the Python timezone info of the connection's timezone.""" + if pgconn is None: + return timezone_utc + + cdef bytes tzname = libpq.PQparameterStatus(pgconn._pgconn_ptr, b"TimeZone") + cdef PyObject *ptr = PyDict_GetItem(_timezones, tzname) + if ptr != NULL: + return <object>ptr + + sname = tzname.decode() if tzname else "UTC" + try: + zi = ZoneInfo(sname) + except (KeyError, OSError): + logger.warning( + "unknown PostgreSQL timezone: %r; will use UTC", sname + ) + zi = timezone_utc + except Exception as ex: + logger.warning( + "error handling PostgreSQL timezone: %r; will use UTC (%s - %s)", + sname, + type(ex).__name__, + ex, + ) + zi = timezone.utc + + _timezones[tzname] = zi + return zi + + +cdef const char *_get_datestyle(pq.PGconn pgconn): + cdef const char *ds + if pgconn is not None: + ds = libpq.PQparameterStatus(pgconn._pgconn_ptr, b"DateStyle") + if ds is not NULL and ds[0]: + return ds + + return b"ISO, DMY" + + +cdef const char *_get_intervalstyle(pq.PGconn pgconn): + cdef const char *ds + if pgconn is not None: + ds = libpq.PQparameterStatus(pgconn._pgconn_ptr, b"IntervalStyle") + if ds is not NULL and ds[0]: + return ds + + return b"postgres" diff --git a/psycopg_c/psycopg_c/types/numeric.pyx b/psycopg_c/psycopg_c/types/numeric.pyx new file mode 100644 index 0000000..893bdc2 --- /dev/null +++ b/psycopg_c/psycopg_c/types/numeric.pyx @@ -0,0 +1,715 @@ +""" +Cython adapters for numeric types. +""" + +# Copyright (C) 2020 The Psycopg Team + +cimport cython + +from libc.stdint cimport * +from libc.string cimport memcpy, strlen +from cpython.mem cimport PyMem_Free +from cpython.dict cimport PyDict_GetItem, PyDict_SetItem +from cpython.long cimport ( + PyLong_FromString, PyLong_FromLong, PyLong_FromLongLong, + PyLong_FromUnsignedLong, PyLong_AsLongLong) +from cpython.bytes cimport PyBytes_AsStringAndSize +from cpython.float cimport PyFloat_FromDouble, PyFloat_AsDouble +from cpython.unicode cimport PyUnicode_DecodeUTF8 + +from decimal import Decimal, Context, DefaultContext + +from psycopg_c._psycopg cimport endian +from psycopg import errors as e +from psycopg._wrappers import Int2, Int4, Int8, IntNumeric + +cdef extern from "Python.h": + # work around https://github.com/cython/cython/issues/3909 + double PyOS_string_to_double( + const char *s, char **endptr, PyObject *overflow_exception) except? -1.0 + char *PyOS_double_to_string( + double val, char format_code, int precision, int flags, int *ptype + ) except NULL + int Py_DTSF_ADD_DOT_0 + long long PyLong_AsLongLongAndOverflow(object pylong, int *overflow) except? -1 + + # Missing in cpython/unicode.pxd + const char *PyUnicode_AsUTF8(object unicode) except NULL + + +# defined in numutils.c +cdef extern from *: + """ +int pg_lltoa(int64_t value, char *a); +#define MAXINT8LEN 20 + """ + int pg_lltoa(int64_t value, char *a) + const int MAXINT8LEN + + +cdef class _NumberDumper(CDumper): + + format = PQ_TEXT + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + return dump_int_to_text(obj, rv, offset) + + def quote(self, obj) -> bytearray: + cdef Py_ssize_t length + + rv = PyByteArray_FromStringAndSize("", 0) + if obj >= 0: + length = self.cdump(obj, rv, 0) + else: + PyByteArray_Resize(rv, 23) + rv[0] = b' ' + length = 1 + self.cdump(obj, rv, 1) + + PyByteArray_Resize(rv, length) + return rv + + +@cython.final +cdef class Int2Dumper(_NumberDumper): + + oid = oids.INT2_OID + + +@cython.final +cdef class Int4Dumper(_NumberDumper): + + oid = oids.INT4_OID + + +@cython.final +cdef class Int8Dumper(_NumberDumper): + + oid = oids.INT8_OID + + +@cython.final +cdef class IntNumericDumper(_NumberDumper): + + oid = oids.NUMERIC_OID + + +@cython.final +cdef class Int2BinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.INT2_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef int16_t *buf = <int16_t *>CDumper.ensure_size( + rv, offset, sizeof(int16_t)) + cdef int16_t val = <int16_t>PyLong_AsLongLong(obj) + # swap bytes if needed + cdef uint16_t *ptvar = <uint16_t *>(&val) + buf[0] = endian.htobe16(ptvar[0]) + return sizeof(int16_t) + + +@cython.final +cdef class Int4BinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.INT4_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef int32_t *buf = <int32_t *>CDumper.ensure_size( + rv, offset, sizeof(int32_t)) + cdef int32_t val = <int32_t>PyLong_AsLongLong(obj) + # swap bytes if needed + cdef uint32_t *ptvar = <uint32_t *>(&val) + buf[0] = endian.htobe32(ptvar[0]) + return sizeof(int32_t) + + +@cython.final +cdef class Int8BinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.INT8_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef int64_t *buf = <int64_t *>CDumper.ensure_size( + rv, offset, sizeof(int64_t)) + cdef int64_t val = PyLong_AsLongLong(obj) + # swap bytes if needed + cdef uint64_t *ptvar = <uint64_t *>(&val) + buf[0] = endian.htobe64(ptvar[0]) + return sizeof(int64_t) + + +cdef extern from *: + """ +/* Ratio between number of bits required to store a number and number of pg + * decimal digits required (log(2) / log(10_000)). + */ +#define BIT_PER_PGDIGIT 0.07525749891599529 + +/* decimal digits per Postgres "digit" */ +#define DEC_DIGITS 4 + +#define NUMERIC_POS 0x0000 +#define NUMERIC_NEG 0x4000 +#define NUMERIC_NAN 0xC000 +#define NUMERIC_PINF 0xD000 +#define NUMERIC_NINF 0xF000 +""" + const double BIT_PER_PGDIGIT + const int DEC_DIGITS + const int NUMERIC_POS + const int NUMERIC_NEG + const int NUMERIC_NAN + const int NUMERIC_PINF + const int NUMERIC_NINF + + +@cython.final +cdef class IntNumericBinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.NUMERIC_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + return dump_int_to_numeric_binary(obj, rv, offset) + + +cdef class IntDumper(_NumberDumper): + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + raise TypeError( + f"{type(self).__name__} is a dispatcher to other dumpers:" + " dump() is not supposed to be called" + ) + + cpdef get_key(self, obj, format): + cdef long long val + cdef int overflow + + val = PyLong_AsLongLongAndOverflow(obj, &overflow) + if overflow: + return IntNumeric + + if INT32_MIN <= obj <= INT32_MAX: + if INT16_MIN <= obj <= INT16_MAX: + return Int2 + else: + return Int4 + else: + if INT64_MIN <= obj <= INT64_MAX: + return Int8 + else: + return IntNumeric + + _int2_dumper = Int2Dumper + _int4_dumper = Int4Dumper + _int8_dumper = Int8Dumper + _int_numeric_dumper = IntNumericDumper + + cpdef upgrade(self, obj, format): + cdef long long val + cdef int overflow + + val = PyLong_AsLongLongAndOverflow(obj, &overflow) + if overflow: + return self._int_numeric_dumper(IntNumeric) + + if INT32_MIN <= obj <= INT32_MAX: + if INT16_MIN <= obj <= INT16_MAX: + return self._int2_dumper(Int2) + else: + return self._int4_dumper(Int4) + else: + if INT64_MIN <= obj <= INT64_MAX: + return self._int8_dumper(Int8) + else: + return self._int_numeric_dumper(IntNumeric) + + +@cython.final +cdef class IntBinaryDumper(IntDumper): + + format = PQ_BINARY + + _int2_dumper = Int2BinaryDumper + _int4_dumper = Int4BinaryDumper + _int8_dumper = Int8BinaryDumper + _int_numeric_dumper = IntNumericBinaryDumper + + +@cython.final +cdef class IntLoader(CLoader): + + format = PQ_TEXT + + cdef object cload(self, const char *data, size_t length): + # if the number ends with a 0 we don't need a copy + if data[length] == b'\0': + return PyLong_FromString(data, NULL, 10) + + # Otherwise we have to copy it aside + if length > MAXINT8LEN: + raise ValueError("string too big for an int") + + cdef char[MAXINT8LEN + 1] buf + memcpy(buf, data, length) + buf[length] = 0 + return PyLong_FromString(buf, NULL, 10) + + + +@cython.final +cdef class Int2BinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + return PyLong_FromLong(<int16_t>endian.be16toh((<uint16_t *>data)[0])) + + +@cython.final +cdef class Int4BinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + return PyLong_FromLong(<int32_t>endian.be32toh((<uint32_t *>data)[0])) + + +@cython.final +cdef class Int8BinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + return PyLong_FromLongLong(<int64_t>endian.be64toh((<uint64_t *>data)[0])) + + +@cython.final +cdef class OidBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + return PyLong_FromUnsignedLong(endian.be32toh((<uint32_t *>data)[0])) + + +cdef class _FloatDumper(CDumper): + + format = PQ_TEXT + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef double d = PyFloat_AsDouble(obj) + cdef char *out = PyOS_double_to_string( + d, b'r', 0, Py_DTSF_ADD_DOT_0, NULL) + cdef Py_ssize_t length = strlen(out) + cdef char *tgt = CDumper.ensure_size(rv, offset, length) + memcpy(tgt, out, length) + PyMem_Free(out) + return length + + def quote(self, obj) -> bytes: + value = bytes(self.dump(obj)) + cdef PyObject *ptr = PyDict_GetItem(_special_float, value) + if ptr != NULL: + return <object>ptr + + return value if obj >= 0 else b" " + value + +cdef dict _special_float = { + b"inf": b"'Infinity'::float8", + b"-inf": b"'-Infinity'::float8", + b"nan": b"'NaN'::float8", +} + + +@cython.final +cdef class FloatDumper(_FloatDumper): + + oid = oids.FLOAT8_OID + + +@cython.final +cdef class Float4Dumper(_FloatDumper): + + oid = oids.FLOAT4_OID + + +@cython.final +cdef class FloatBinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.FLOAT8_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef double d = PyFloat_AsDouble(obj) + cdef uint64_t *intptr = <uint64_t *>&d + cdef uint64_t *buf = <uint64_t *>CDumper.ensure_size( + rv, offset, sizeof(uint64_t)) + buf[0] = endian.htobe64(intptr[0]) + return sizeof(uint64_t) + + +@cython.final +cdef class Float4BinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.FLOAT4_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef float f = <float>PyFloat_AsDouble(obj) + cdef uint32_t *intptr = <uint32_t *>&f + cdef uint32_t *buf = <uint32_t *>CDumper.ensure_size( + rv, offset, sizeof(uint32_t)) + buf[0] = endian.htobe32(intptr[0]) + return sizeof(uint32_t) + + +@cython.final +cdef class FloatLoader(CLoader): + + format = PQ_TEXT + + cdef object cload(self, const char *data, size_t length): + cdef char *endptr + cdef double d = PyOS_string_to_double( + data, &endptr, <PyObject *>OverflowError) + return PyFloat_FromDouble(d) + + +@cython.final +cdef class Float4BinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + cdef uint32_t asint = endian.be32toh((<uint32_t *>data)[0]) + # avoid warning: + # dereferencing type-punned pointer will break strict-aliasing rules + cdef char *swp = <char *>&asint + return PyFloat_FromDouble((<float *>swp)[0]) + + +@cython.final +cdef class Float8BinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + cdef uint64_t asint = endian.be64toh((<uint64_t *>data)[0]) + cdef char *swp = <char *>&asint + return PyFloat_FromDouble((<double *>swp)[0]) + + +@cython.final +cdef class DecimalDumper(CDumper): + + format = PQ_TEXT + oid = oids.NUMERIC_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + return dump_decimal_to_text(obj, rv, offset) + + def quote(self, obj) -> bytes: + value = bytes(self.dump(obj)) + cdef PyObject *ptr = PyDict_GetItem(_special_decimal, value) + if ptr != NULL: + return <object>ptr + + return value if obj >= 0 else b" " + value + +cdef dict _special_decimal = { + b"Infinity": b"'Infinity'::numeric", + b"-Infinity": b"'-Infinity'::numeric", + b"NaN": b"'NaN'::numeric", +} + + +@cython.final +cdef class NumericLoader(CLoader): + + format = PQ_TEXT + + cdef object cload(self, const char *data, size_t length): + s = PyUnicode_DecodeUTF8(<char *>data, length, NULL) + return Decimal(s) + + +cdef dict _decimal_special = { + NUMERIC_NAN: Decimal("NaN"), + NUMERIC_PINF: Decimal("Infinity"), + NUMERIC_NINF: Decimal("-Infinity"), +} + +cdef dict _contexts = {} +for _i in range(DefaultContext.prec): + _contexts[_i] = DefaultContext + + +@cython.final +cdef class NumericBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + + cdef uint16_t *data16 = <uint16_t *>data + cdef uint16_t ndigits = endian.be16toh(data16[0]) + cdef int16_t weight = <int16_t>endian.be16toh(data16[1]) + cdef uint16_t sign = endian.be16toh(data16[2]) + cdef uint16_t dscale = endian.be16toh(data16[3]) + cdef int shift + cdef int i + cdef PyObject *pctx + cdef object key + + if sign == NUMERIC_POS or sign == NUMERIC_NEG: + if length != (4 + ndigits) * sizeof(uint16_t): + raise e.DataError("bad ndigits in numeric binary representation") + + val = 0 + for i in range(ndigits): + val *= 10_000 + val += endian.be16toh(data16[i + 4]) + + shift = dscale - (ndigits - weight - 1) * DEC_DIGITS + + key = (weight + 2) * DEC_DIGITS + dscale + pctx = PyDict_GetItem(_contexts, key) + if pctx == NULL: + ctx = Context(prec=key) + PyDict_SetItem(_contexts, key, ctx) + pctx = <PyObject *>ctx + + return ( + Decimal(val if sign == NUMERIC_POS else -val) + .scaleb(-dscale, <object>pctx) + .shift(shift, <object>pctx) + ) + else: + try: + return _decimal_special[sign] + except KeyError: + raise e.DataError(f"bad value for numeric sign: 0x{sign:X}") + + +@cython.final +cdef class DecimalBinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.NUMERIC_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + return dump_decimal_to_numeric_binary(obj, rv, offset) + + +@cython.final +cdef class NumericDumper(CDumper): + + format = PQ_TEXT + oid = oids.NUMERIC_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + if isinstance(obj, int): + return dump_int_to_text(obj, rv, offset) + else: + return dump_decimal_to_text(obj, rv, offset) + + +@cython.final +cdef class NumericBinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.NUMERIC_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + if isinstance(obj, int): + return dump_int_to_numeric_binary(obj, rv, offset) + else: + return dump_decimal_to_numeric_binary(obj, rv, offset) + + +cdef Py_ssize_t dump_decimal_to_text(obj, bytearray rv, Py_ssize_t offset) except -1: + cdef char *src + cdef Py_ssize_t length + cdef char *buf + + b = bytes(str(obj), "utf-8") + PyBytes_AsStringAndSize(b, &src, &length) + + if src[0] != b's': + buf = CDumper.ensure_size(rv, offset, length) + memcpy(buf, src, length) + + else: # convert sNaN to NaN + length = 3 # NaN + buf = CDumper.ensure_size(rv, offset, length) + memcpy(buf, b"NaN", length) + + return length + + +cdef extern from *: + """ +/* Weights of py digits into a pg digit according to their positions. */ +static const int pydigit_weights[] = {1000, 100, 10, 1}; +""" + const int[4] pydigit_weights + + +@cython.cdivision(True) +cdef Py_ssize_t dump_decimal_to_numeric_binary( + obj, bytearray rv, Py_ssize_t offset +) except -1: + + # TODO: this implementation is about 30% slower than the text dump. + # This might be probably optimised by accessing the C structure of + # the Decimal object, if available, which would save the creation of + # several intermediate Python objects (the DecimalTuple, the digits + # tuple, and then accessing them). + + cdef object t = obj.as_tuple() + cdef int sign = t[0] + cdef tuple digits = t[1] + cdef uint16_t *buf + cdef Py_ssize_t length + + cdef object pyexp = t[2] + cdef const char *bexp + if not isinstance(pyexp, int): + # Handle inf, nan + length = 4 * sizeof(uint16_t) + buf = <uint16_t *>CDumper.ensure_size(rv, offset, length) + buf[0] = 0 + buf[1] = 0 + buf[3] = 0 + bexp = PyUnicode_AsUTF8(pyexp) + if bexp[0] == b'n' or bexp[0] == b'N': + buf[2] = endian.htobe16(NUMERIC_NAN) + elif bexp[0] == b'F': + if sign: + buf[2] = endian.htobe16(NUMERIC_NINF) + else: + buf[2] = endian.htobe16(NUMERIC_PINF) + else: + raise e.DataError(f"unexpected decimal exponent: {pyexp}") + return length + + cdef int exp = pyexp + cdef uint16_t ndigits = <uint16_t>len(digits) + + # Find the last nonzero digit + cdef int nzdigits = ndigits + while nzdigits > 0 and digits[nzdigits - 1] == 0: + nzdigits -= 1 + + cdef uint16_t dscale + if exp <= 0: + dscale = -exp + else: + dscale = 0 + # align the py digits to the pg digits if there's some py exponent + ndigits += exp % DEC_DIGITS + + if nzdigits == 0: + length = 4 * sizeof(uint16_t) + buf = <uint16_t *>CDumper.ensure_size(rv, offset, length) + buf[0] = 0 # ndigits + buf[1] = 0 # weight + buf[2] = endian.htobe16(NUMERIC_POS) # sign + buf[3] = endian.htobe16(dscale) + return length + + # Equivalent of 0-padding left to align the py digits to the pg digits + # but without changing the digits tuple. + cdef int wi = 0 + cdef int mod = (ndigits - dscale) % DEC_DIGITS + if mod < 0: + # the difference between C and Py % operator + mod += 4 + if mod: + wi = DEC_DIGITS - mod + ndigits += wi + + cdef int tmp = nzdigits + wi + cdef int pgdigits = tmp // DEC_DIGITS + (tmp % DEC_DIGITS and 1) + length = (pgdigits + 4) * sizeof(uint16_t) + buf = <uint16_t*>CDumper.ensure_size(rv, offset, length) + buf[0] = endian.htobe16(pgdigits) + buf[1] = endian.htobe16(<int16_t>((ndigits + exp) // DEC_DIGITS - 1)) + buf[2] = endian.htobe16(NUMERIC_NEG) if sign else endian.htobe16(NUMERIC_POS) + buf[3] = endian.htobe16(dscale) + + cdef uint16_t pgdigit = 0 + cdef int bi = 4 + for i in range(nzdigits): + pgdigit += pydigit_weights[wi] * <int>(digits[i]) + wi += 1 + if wi >= DEC_DIGITS: + buf[bi] = endian.htobe16(pgdigit) + pgdigit = wi = 0 + bi += 1 + + if pgdigit: + buf[bi] = endian.htobe16(pgdigit) + + return length + + +cdef Py_ssize_t dump_int_to_text(obj, bytearray rv, Py_ssize_t offset) except -1: + cdef long long val + cdef int overflow + cdef char *buf + cdef char *src + cdef Py_ssize_t length + + # Ensure an int or a subclass. The 'is' type check is fast. + # Passing a float must give an error, but passing an Enum should work. + if type(obj) is not int and not isinstance(obj, int): + raise e.DataError(f"integer expected, got {type(obj).__name__!r}") + + val = PyLong_AsLongLongAndOverflow(obj, &overflow) + if not overflow: + buf = CDumper.ensure_size(rv, offset, MAXINT8LEN + 1) + length = pg_lltoa(val, buf) + else: + b = bytes(str(obj), "utf-8") + PyBytes_AsStringAndSize(b, &src, &length) + buf = CDumper.ensure_size(rv, offset, length) + memcpy(buf, src, length) + + return length + + +cdef Py_ssize_t dump_int_to_numeric_binary(obj, bytearray rv, Py_ssize_t offset) except -1: + # Calculate the number of PG digits required to store the number + cdef uint16_t ndigits + ndigits = <uint16_t>((<int>obj.bit_length()) * BIT_PER_PGDIGIT) + 1 + + cdef uint16_t sign = NUMERIC_POS + if obj < 0: + sign = NUMERIC_NEG + obj = -obj + + cdef Py_ssize_t length = sizeof(uint16_t) * (ndigits + 4) + cdef uint16_t *buf + buf = <uint16_t *><void *>CDumper.ensure_size(rv, offset, length) + buf[0] = endian.htobe16(ndigits) + buf[1] = endian.htobe16(ndigits - 1) # weight + buf[2] = endian.htobe16(sign) + buf[3] = 0 # dscale + + cdef int i = 4 + ndigits - 1 + cdef uint16_t rem + while obj: + rem = obj % 10000 + obj //= 10000 + buf[i] = endian.htobe16(rem) + i -= 1 + while i > 3: + buf[i] = 0 + i -= 1 + + return length diff --git a/psycopg_c/psycopg_c/types/numutils.c b/psycopg_c/psycopg_c/types/numutils.c new file mode 100644 index 0000000..4be7108 --- /dev/null +++ b/psycopg_c/psycopg_c/types/numutils.c @@ -0,0 +1,243 @@ +/* + * Utilities to deal with numbers. + * + * Copyright (C) 2020 The Psycopg Team + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + */ + +#include <stdint.h> +#include <string.h> + +#include "pg_config.h" + + +/* + * 64-bit integers + */ +#ifdef HAVE_LONG_INT_64 +/* Plain "long int" fits, use it */ + +# ifndef HAVE_INT64 +typedef long int int64; +# endif +# ifndef HAVE_UINT64 +typedef unsigned long int uint64; +# endif +# define INT64CONST(x) (x##L) +# define UINT64CONST(x) (x##UL) +#elif defined(HAVE_LONG_LONG_INT_64) +/* We have working support for "long long int", use that */ + +# ifndef HAVE_INT64 +typedef long long int int64; +# endif +# ifndef HAVE_UINT64 +typedef unsigned long long int uint64; +# endif +# define INT64CONST(x) (x##LL) +# define UINT64CONST(x) (x##ULL) +#else +/* neither HAVE_LONG_INT_64 nor HAVE_LONG_LONG_INT_64 */ +# error must have a working 64-bit integer datatype +#endif + + +#ifndef HAVE__BUILTIN_CLZ +static const uint8_t pg_leftmost_one_pos[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; +#endif + +static const char DIGIT_TABLE[200] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', + '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', + '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2', + '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', + '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', + '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', + '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', '5', '0', '5', '1', '5', + '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', + '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', + '7', '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', + '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', + '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', + '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', + '7', '9', '8', '9', '9' +}; + + +/* + * pg_leftmost_one_pos64 + * As above, but for a 64-bit word. + */ +static inline int +pg_leftmost_one_pos64(uint64_t word) +{ +#ifdef HAVE__BUILTIN_CLZ +#if defined(HAVE_LONG_INT_64) + return 63 - __builtin_clzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return 63 - __builtin_clzll(word); +#else +#error must have a working 64-bit integer datatype +#endif +#else /* !HAVE__BUILTIN_CLZ */ + int shift = 64 - 8; + + while ((word >> shift) == 0) + shift -= 8; + + return shift + pg_leftmost_one_pos[(word >> shift) & 255]; +#endif /* HAVE__BUILTIN_CLZ */ +} + + +static inline int +decimalLength64(const uint64_t v) +{ + int t; + static const uint64_t PowersOfTen[] = { + UINT64CONST(1), UINT64CONST(10), + UINT64CONST(100), UINT64CONST(1000), + UINT64CONST(10000), UINT64CONST(100000), + UINT64CONST(1000000), UINT64CONST(10000000), + UINT64CONST(100000000), UINT64CONST(1000000000), + UINT64CONST(10000000000), UINT64CONST(100000000000), + UINT64CONST(1000000000000), UINT64CONST(10000000000000), + UINT64CONST(100000000000000), UINT64CONST(1000000000000000), + UINT64CONST(10000000000000000), UINT64CONST(100000000000000000), + UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000) + }; + + /* + * Compute base-10 logarithm by dividing the base-2 logarithm by a + * good-enough approximation of the base-2 logarithm of 10 + */ + t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096; + return t + (v >= PowersOfTen[t]); +} + + +/* + * Get the decimal representation, not NUL-terminated, and return the length of + * same. Caller must ensure that a points to at least MAXINT8LEN bytes. + */ +int +pg_ulltoa_n(uint64_t value, char *a) +{ + int olength, + i = 0; + uint32_t value2; + + /* Degenerate case */ + if (value == 0) + { + *a = '0'; + return 1; + } + + olength = decimalLength64(value); + + /* Compute the result string. */ + while (value >= 100000000) + { + const uint64_t q = value / 100000000; + uint32_t value2 = (uint32_t) (value - 100000000 * q); + + const uint32_t c = value2 % 10000; + const uint32_t d = value2 / 10000; + const uint32_t c0 = (c % 100) << 1; + const uint32_t c1 = (c / 100) << 1; + const uint32_t d0 = (d % 100) << 1; + const uint32_t d1 = (d / 100) << 1; + + char *pos = a + olength - i; + + value = q; + + memcpy(pos - 2, DIGIT_TABLE + c0, 2); + memcpy(pos - 4, DIGIT_TABLE + c1, 2); + memcpy(pos - 6, DIGIT_TABLE + d0, 2); + memcpy(pos - 8, DIGIT_TABLE + d1, 2); + i += 8; + } + + /* Switch to 32-bit for speed */ + value2 = (uint32_t) value; + + if (value2 >= 10000) + { + const uint32_t c = value2 - 10000 * (value2 / 10000); + const uint32_t c0 = (c % 100) << 1; + const uint32_t c1 = (c / 100) << 1; + + char *pos = a + olength - i; + + value2 /= 10000; + + memcpy(pos - 2, DIGIT_TABLE + c0, 2); + memcpy(pos - 4, DIGIT_TABLE + c1, 2); + i += 4; + } + if (value2 >= 100) + { + const uint32_t c = (value2 % 100) << 1; + char *pos = a + olength - i; + + value2 /= 100; + + memcpy(pos - 2, DIGIT_TABLE + c, 2); + i += 2; + } + if (value2 >= 10) + { + const uint32_t c = value2 << 1; + char *pos = a + olength - i; + + memcpy(pos - 2, DIGIT_TABLE + c, 2); + } + else + *a = (char) ('0' + value2); + + return olength; +} + +/* + * pg_lltoa: converts a signed 64-bit integer to its string representation and + * returns strlen(a). + * + * Caller must ensure that 'a' points to enough memory to hold the result + * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL). + */ +int +pg_lltoa(int64_t value, char *a) +{ + uint64_t uvalue = value; + int len = 0; + + if (value < 0) + { + uvalue = (uint64_t) 0 - uvalue; + a[len++] = '-'; + } + + len += pg_ulltoa_n(uvalue, a + len); + a[len] = '\0'; + return len; +} diff --git a/psycopg_c/psycopg_c/types/string.pyx b/psycopg_c/psycopg_c/types/string.pyx new file mode 100644 index 0000000..da18b01 --- /dev/null +++ b/psycopg_c/psycopg_c/types/string.pyx @@ -0,0 +1,315 @@ +""" +Cython adapters for textual types. +""" + +# Copyright (C) 2020 The Psycopg Team + +cimport cython + +from libc.string cimport memcpy, memchr +from cpython.bytes cimport PyBytes_AsString, PyBytes_AsStringAndSize +from cpython.unicode cimport ( + PyUnicode_AsEncodedString, + PyUnicode_AsUTF8String, + PyUnicode_CheckExact, + PyUnicode_Decode, + PyUnicode_DecodeUTF8, +) + +from psycopg_c.pq cimport libpq, Escaping, _buffer_as_string_and_size + +from psycopg import errors as e +from psycopg._encodings import pg2pyenc + +cdef extern from "Python.h": + const char *PyUnicode_AsUTF8AndSize(unicode obj, Py_ssize_t *size) except NULL + + +cdef class _BaseStrDumper(CDumper): + cdef int is_utf8 + cdef char *encoding + cdef bytes _bytes_encoding # needed to keep `encoding` alive + + def __cinit__(self, cls, context: Optional[AdaptContext] = None): + + self.is_utf8 = 0 + self.encoding = "utf-8" + cdef const char *pgenc + + if self._pgconn is not None: + pgenc = libpq.PQparameterStatus(self._pgconn._pgconn_ptr, b"client_encoding") + if pgenc == NULL or pgenc == b"UTF8": + self._bytes_encoding = b"utf-8" + self.is_utf8 = 1 + else: + self._bytes_encoding = pg2pyenc(pgenc).encode() + if self._bytes_encoding == b"ascii": + self.is_utf8 = 1 + self.encoding = PyBytes_AsString(self._bytes_encoding) + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + # the server will raise DataError subclass if the string contains 0x00 + cdef Py_ssize_t size; + cdef const char *src + + if self.is_utf8: + # Probably the fastest path, but doesn't work with subclasses + if PyUnicode_CheckExact(obj): + src = PyUnicode_AsUTF8AndSize(obj, &size) + else: + b = PyUnicode_AsUTF8String(obj) + PyBytes_AsStringAndSize(b, <char **>&src, &size) + else: + b = PyUnicode_AsEncodedString(obj, self.encoding, NULL) + PyBytes_AsStringAndSize(b, <char **>&src, &size) + + cdef char *buf = CDumper.ensure_size(rv, offset, size) + memcpy(buf, src, size) + return size + + +cdef class _StrBinaryDumper(_BaseStrDumper): + + format = PQ_BINARY + + +@cython.final +cdef class StrBinaryDumper(_StrBinaryDumper): + + oid = oids.TEXT_OID + + +@cython.final +cdef class StrBinaryDumperVarchar(_StrBinaryDumper): + + oid = oids.VARCHAR_OID + + +@cython.final +cdef class StrBinaryDumperName(_StrBinaryDumper): + + oid = oids.NAME_OID + + +cdef class _StrDumper(_BaseStrDumper): + + format = PQ_TEXT + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef Py_ssize_t size = StrBinaryDumper.cdump(self, obj, rv, offset) + + # Like the binary dump, but check for 0, or the string will be truncated + cdef const char *buf = PyByteArray_AS_STRING(rv) + if NULL != memchr(buf + offset, 0x00, size): + raise e.DataError( + "PostgreSQL text fields cannot contain NUL (0x00) bytes" + ) + return size + + +@cython.final +cdef class StrDumper(_StrDumper): + + oid = oids.TEXT_OID + + +@cython.final +cdef class StrDumperVarchar(_StrDumper): + + oid = oids.VARCHAR_OID + + +@cython.final +cdef class StrDumperName(_StrDumper): + + oid = oids.NAME_OID + + +@cython.final +cdef class StrDumperUnknown(_StrDumper): + pass + + +cdef class _TextLoader(CLoader): + + format = PQ_TEXT + + cdef int is_utf8 + cdef char *encoding + cdef bytes _bytes_encoding # needed to keep `encoding` alive + + def __cinit__(self, oid: int, context: Optional[AdaptContext] = None): + + self.is_utf8 = 0 + self.encoding = "utf-8" + cdef const char *pgenc + + if self._pgconn is not None: + pgenc = libpq.PQparameterStatus(self._pgconn._pgconn_ptr, b"client_encoding") + if pgenc == NULL or pgenc == b"UTF8": + self._bytes_encoding = b"utf-8" + self.is_utf8 = 1 + else: + self._bytes_encoding = pg2pyenc(pgenc).encode() + + if pgenc == b"SQL_ASCII": + self.encoding = NULL + else: + self.encoding = PyBytes_AsString(self._bytes_encoding) + + cdef object cload(self, const char *data, size_t length): + if self.is_utf8: + return PyUnicode_DecodeUTF8(<char *>data, length, NULL) + elif self.encoding: + return PyUnicode_Decode(<char *>data, length, self.encoding, NULL) + else: + return data[:length] + +@cython.final +cdef class TextLoader(_TextLoader): + + format = PQ_TEXT + + +@cython.final +cdef class TextBinaryLoader(_TextLoader): + + format = PQ_BINARY + + +@cython.final +cdef class BytesDumper(CDumper): + + format = PQ_TEXT + oid = oids.BYTEA_OID + + # 0: not set, 1: just single "'" quote, 3: " E'" quote + cdef int _qplen + + def __cinit__(self): + self._qplen = 0 + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + + cdef size_t len_out + cdef unsigned char *out + cdef char *ptr + cdef Py_ssize_t length + + _buffer_as_string_and_size(obj, &ptr, &length) + + if self._pgconn is not None and self._pgconn._pgconn_ptr != NULL: + out = libpq.PQescapeByteaConn( + self._pgconn._pgconn_ptr, <unsigned char *>ptr, length, &len_out) + else: + out = libpq.PQescapeBytea(<unsigned char *>ptr, length, &len_out) + + if out is NULL: + raise MemoryError( + f"couldn't allocate for escape_bytea of {length} bytes" + ) + + len_out -= 1 # out includes final 0 + cdef char *buf = CDumper.ensure_size(rv, offset, len_out) + memcpy(buf, out, len_out) + libpq.PQfreemem(out) + return len_out + + def quote(self, obj): + cdef size_t len_out + cdef unsigned char *out + cdef char *ptr + cdef Py_ssize_t length + cdef const char *scs + + escaped = self.dump(obj) + _buffer_as_string_and_size(escaped, &ptr, &length) + + rv = PyByteArray_FromStringAndSize("", 0) + + # We cannot use the base quoting because escape_bytea already returns + # the quotes content. if scs is off it will escape the backslashes in + # the format, otherwise it won't, but it doesn't tell us what quotes to + # use. + if self._pgconn is not None: + if not self._qplen: + scs = libpq.PQparameterStatus(self._pgconn._pgconn_ptr, + b"standard_conforming_strings") + if scs and scs[0] == b'o' and scs[1] == b"n": # == "on" + self._qplen = 1 + else: + self._qplen = 3 + + PyByteArray_Resize(rv, length + self._qplen + 1) # Include quotes + ptr_out = PyByteArray_AS_STRING(rv) + if self._qplen == 1: + ptr_out[0] = b"'" + else: + ptr_out[0] = b" " + ptr_out[1] = b"E" + ptr_out[2] = b"'" + memcpy(ptr_out + self._qplen, ptr, length) + ptr_out[length + self._qplen] = b"'" + return rv + + # We don't have a connection, so someone is using us to generate a file + # to use off-line or something like that. PQescapeBytea, like its + # string counterpart, is not predictable whether it will escape + # backslashes. + PyByteArray_Resize(rv, length + 4) # Include quotes + ptr_out = PyByteArray_AS_STRING(rv) + ptr_out[0] = b" " + ptr_out[1] = b"E" + ptr_out[2] = b"'" + memcpy(ptr_out + 3, ptr, length) + ptr_out[length + 3] = b"'" + + esc = Escaping() + if esc.escape_bytea(b"\x00") == b"\\000": + rv = bytes(rv).replace(b"\\", b"\\\\") + + return rv + + +@cython.final +cdef class BytesBinaryDumper(CDumper): + + format = PQ_BINARY + oid = oids.BYTEA_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef char *src + cdef Py_ssize_t size; + _buffer_as_string_and_size(obj, &src, &size) + + cdef char *buf = CDumper.ensure_size(rv, offset, size) + memcpy(buf, src, size) + return size + + +@cython.final +cdef class ByteaLoader(CLoader): + + format = PQ_TEXT + + cdef object cload(self, const char *data, size_t length): + cdef size_t len_out + cdef unsigned char *out = libpq.PQunescapeBytea( + <const unsigned char *>data, &len_out) + if out is NULL: + raise MemoryError( + f"couldn't allocate for unescape_bytea of {len(data)} bytes" + ) + + rv = out[:len_out] + libpq.PQfreemem(out) + return rv + + +@cython.final +cdef class ByteaBinaryLoader(CLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + return data[:length] diff --git a/psycopg_c/psycopg_c/version.py b/psycopg_c/psycopg_c/version.py new file mode 100644 index 0000000..5c989c2 --- /dev/null +++ b/psycopg_c/psycopg_c/version.py @@ -0,0 +1,11 @@ +""" +psycopg-c distribution version file. +""" + +# Copyright (C) 2020 The Psycopg Team + +# Use a versioning scheme as defined in +# https://www.python.org/dev/peps/pep-0440/ +__version__ = "3.1.7" + +# also change psycopg/psycopg/version.py accordingly. diff --git a/psycopg_c/pyproject.toml b/psycopg_c/pyproject.toml new file mode 100644 index 0000000..f0d7a3f --- /dev/null +++ b/psycopg_c/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=49.2.0", "wheel>=0.37", "Cython>=3.0.0a11"] +build-backend = "setuptools.build_meta" diff --git a/psycopg_c/setup.cfg b/psycopg_c/setup.cfg new file mode 100644 index 0000000..6c5c93c --- /dev/null +++ b/psycopg_c/setup.cfg @@ -0,0 +1,57 @@ +[metadata] +name = psycopg-c +description = PostgreSQL database adapter for Python -- C optimisation distribution +url = https://psycopg.org/psycopg3/ +author = Daniele Varrazzo +author_email = daniele.varrazzo@gmail.com +license = GNU Lesser General Public License v3 (LGPLv3) + +project_urls = + Homepage = https://psycopg.org/ + Code = https://github.com/psycopg/psycopg + Issue Tracker = https://github.com/psycopg/psycopg/issues + Download = https://pypi.org/project/psycopg-c/ + +classifiers = + Development Status :: 5 - Production/Stable + Intended Audience :: Developers + License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) + Operating System :: MacOS :: MacOS X + Operating System :: Microsoft :: Windows + Operating System :: POSIX + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 + Topic :: Database + Topic :: Database :: Front-Ends + Topic :: Software Development + Topic :: Software Development :: Libraries :: Python Modules + +long_description = file: README.rst +long_description_content_type = text/x-rst +license_files = LICENSE.txt + +[options] +python_requires = >= 3.7 +setup_requires = Cython >= 3.0.0a11 +packages = find: +zip_safe = False + +[options.package_data] +# NOTE: do not include .pyx files: they shouldn't be in the sdist +# package, so that build is only performed from the .c files (which are +# distributed instead). +psycopg_c = + py.typed + *.pyi + *.pxd + _psycopg/*.pxd + pq/*.pxd + +# In the psycopg-binary distribution don't include cython-related files. +psycopg_binary = + py.typed + *.pyi diff --git a/psycopg_c/setup.py b/psycopg_c/setup.py new file mode 100644 index 0000000..c6da3a1 --- /dev/null +++ b/psycopg_c/setup.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +""" +PostgreSQL database adapter for Python - optimisation package +""" + +# Copyright (C) 2020 The Psycopg Team + +import os +import re +import sys +import subprocess as sp + +from setuptools import setup, Extension +from distutils.command.build_ext import build_ext +from distutils import log + +# Move to the directory of setup.py: executing this file from another location +# (e.g. from the project root) will fail +here = os.path.abspath(os.path.dirname(__file__)) +if os.path.abspath(os.getcwd()) != here: + os.chdir(here) + +with open("psycopg_c/version.py") as f: + data = f.read() + m = re.search(r"""(?m)^__version__\s*=\s*['"]([^'"]+)['"]""", data) + if m is None: + raise Exception(f"cannot find version in {f.name}") + version = m.group(1) + + +def get_config(what: str) -> str: + pg_config = "pg_config" + try: + out = sp.run([pg_config, f"--{what}"], stdout=sp.PIPE, check=True) + except Exception as e: + log.error(f"couldn't run {pg_config!r} --{what}: %s", e) + raise + else: + return out.stdout.strip().decode() + + +class psycopg_build_ext(build_ext): + def finalize_options(self) -> None: + self._setup_ext_build() + super().finalize_options() + + def _setup_ext_build(self) -> None: + cythonize = None + + # In the sdist there are not .pyx, only c, so we don't need Cython. + # Otherwise Cython is a requirement and it is used to compile pyx to c. + if os.path.exists("psycopg_c/_psycopg.pyx"): + from Cython.Build import cythonize + + # Add include and lib dir for the libpq. + includedir = get_config("includedir") + libdir = get_config("libdir") + for ext in self.distribution.ext_modules: + ext.include_dirs.append(includedir) + ext.library_dirs.append(libdir) + + if sys.platform == "win32": + # For __imp_htons and others + ext.libraries.append("ws2_32") + + if cythonize is not None: + for ext in self.distribution.ext_modules: + for i in range(len(ext.sources)): + base, fext = os.path.splitext(ext.sources[i]) + if fext == ".c" and os.path.exists(base + ".pyx"): + ext.sources[i] = base + ".pyx" + + self.distribution.ext_modules = cythonize( + self.distribution.ext_modules, + language_level=3, + compiler_directives={ + "always_allow_keywords": False, + }, + annotate=False, # enable to get an html view of the C module + ) + else: + self.distribution.ext_modules = [pgext, pqext] + + +# MSVC requires an explicit "libpq" +libpq = "pq" if sys.platform != "win32" else "libpq" + +# Some details missing, to be finished by psycopg_build_ext.finalize_options +pgext = Extension( + "psycopg_c._psycopg", + [ + "psycopg_c/_psycopg.c", + "psycopg_c/types/numutils.c", + ], + libraries=[libpq], + include_dirs=[], +) + +pqext = Extension( + "psycopg_c.pq", + ["psycopg_c/pq.c"], + libraries=[libpq], + include_dirs=[], +) + +setup( + version=version, + ext_modules=[pgext, pqext], + cmdclass={"build_ext": psycopg_build_ext}, +) |