diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
commit | 46651ce6fe013220ed397add242004d764fc0153 (patch) | |
tree | 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/jit | |
parent | Initial commit. (diff) | |
download | postgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip |
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/jit')
-rw-r--r-- | src/backend/jit/Makefile | 23 | ||||
-rw-r--r-- | src/backend/jit/README | 295 | ||||
-rw-r--r-- | src/backend/jit/jit.c | 208 | ||||
-rw-r--r-- | src/backend/jit/llvm/Makefile | 76 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit.c | 1250 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit_deform.c | 756 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit_error.cpp | 176 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit_expr.c | 2520 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit_inline.cpp | 899 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit_types.c | 139 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit_wrap.cpp | 78 |
11 files changed, 6420 insertions, 0 deletions
diff --git a/src/backend/jit/Makefile b/src/backend/jit/Makefile new file mode 100644 index 0000000..a895eba --- /dev/null +++ b/src/backend/jit/Makefile @@ -0,0 +1,23 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for JIT code that's provider independent. +# +# Note that the LLVM JIT provider is recursed into by src/Makefile, +# not from here. +# +# IDENTIFICATION +# src/backend/jit/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/jit +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS += -DDLSUFFIX=\"$(DLSUFFIX)\" + +OBJS = \ + jit.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/jit/README b/src/backend/jit/README new file mode 100644 index 0000000..5427bdf --- /dev/null +++ b/src/backend/jit/README @@ -0,0 +1,295 @@ +What is Just-in-Time Compilation? +================================= + +Just-in-Time compilation (JIT) is the process of turning some form of +interpreted program evaluation into a native program, and doing so at +runtime. + +For example, instead of using a facility that can evaluate arbitrary +SQL expressions to evaluate an SQL predicate like WHERE a.col = 3, it +is possible to generate a function than can be natively executed by +the CPU that just handles that expression, yielding a speedup. + +This is JIT, rather than ahead-of-time (AOT) compilation, because it +is done at query execution time, and perhaps only in cases where the +relevant task is repeated a number of times. Given the way JIT +compilation is used in PostgreSQL, the lines between interpretation, +AOT and JIT are somewhat blurry. + +Note that the interpreted program turned into a native program does +not necessarily have to be a program in the classical sense. E.g. it +is highly beneficial to JIT compile tuple deforming into a native +function just handling a specific type of table, despite tuple +deforming not commonly being understood as a "program". + + +Why JIT? +======== + +Parts of PostgreSQL are commonly bottlenecked by comparatively small +pieces of CPU intensive code. In a number of cases that is because the +relevant code has to be very generic (e.g. handling arbitrary SQL +level expressions, over arbitrary tables, with arbitrary extensions +installed). This often leads to a large number of indirect jumps and +unpredictable branches, and generally a high number of instructions +for a given task. E.g. just evaluating an expression comparing a +column in a database to an integer ends up needing several hundred +cycles. + +By generating native code large numbers of indirect jumps can be +removed by either making them into direct branches (e.g. replacing the +indirect call to an SQL operator's implementation with a direct call +to that function), or by removing it entirely (e.g. by evaluating the +branch at compile time because the input is constant). Similarly a lot +of branches can be entirely removed (e.g. by again evaluating the +branch at compile time because the input is constant). The latter is +particularly beneficial for removing branches during tuple deforming. + + +How to JIT +========== + +PostgreSQL, by default, uses LLVM to perform JIT. LLVM was chosen +because it is developed by several large corporations and therefore +unlikely to be discontinued, because it has a license compatible with +PostgreSQL, and because its IR can be generated from C using the Clang +compiler. + + +Shared Library Separation +------------------------- + +To avoid the main PostgreSQL binary directly depending on LLVM, which +would prevent LLVM support being independently installed by OS package +managers, the LLVM dependent code is located in a shared library that +is loaded on-demand. + +An additional benefit of doing so is that it is relatively easy to +evaluate JIT compilation that does not use LLVM, by changing out the +shared library used to provide JIT compilation. + +To achieve this, code intending to perform JIT (e.g. expression evaluation) +calls an LLVM independent wrapper located in jit.c to do so. If the +shared library providing JIT support can be loaded (i.e. PostgreSQL was +compiled with LLVM support and the shared library is installed), the task +of JIT compiling an expression gets handed off to the shared library. This +obviously requires that the function in jit.c is allowed to fail in case +no JIT provider can be loaded. + +Which shared library is loaded is determined by the jit_provider GUC, +defaulting to "llvmjit". + +Cloistering code performing JIT into a shared library unfortunately +also means that code doing JIT compilation for various parts of code +has to be located separately from the code doing so without +JIT. E.g. the JIT version of execExprInterp.c is located in jit/llvm/ +rather than executor/. + + +JIT Context +----------- + +For performance and convenience reasons it is useful to allow JITed +functions to be emitted and deallocated together. It is e.g. very +common to create a number of functions at query initialization time, +use them during query execution, and then deallocate all of them +together at the end of the query. + +Lifetimes of JITed functions are managed via JITContext. Exactly one +such context should be created for work in which all created JITed +function should have the same lifetime. E.g. there's exactly one +JITContext for each query executed, in the query's EState. Only the +release of a JITContext is exposed to the provider independent +facility, as the creation of one is done on-demand by the JIT +implementations. + +Emitting individual functions separately is more expensive than +emitting several functions at once, and emitting them together can +provide additional optimization opportunities. To facilitate that, the +LLVM provider separates defining functions from optimizing and +emitting functions in an executable manner. + +Creating functions into the current mutable module (a module +essentially is LLVM's equivalent of a translation unit in C) is done +using + extern LLVMModuleRef llvm_mutable_module(LLVMJitContext *context); +in which it then can emit as much code using the LLVM APIs as it +wants. Whenever a function actually needs to be called + extern void *llvm_get_function(LLVMJitContext *context, const char *funcname); +returns a pointer to it. + +E.g. in the expression evaluation case this setup allows most +functions in a query to be emitted during ExecInitNode(), delaying the +function emission to the time the first time a function is actually +used. + + +Error Handling +-------------- + +There are two aspects of error handling. Firstly, generated (LLVM IR) +and emitted functions (mmap()ed segments) need to be cleaned up both +after a successful query execution and after an error. This is done by +registering each created JITContext with the current resource owner, +and cleaning it up on error / end of transaction. If it is desirable +to release resources earlier, jit_release_context() can be used. + +The second, less pretty, aspect of error handling is OOM handling +inside LLVM itself. The above resowner based mechanism takes care of +cleaning up emitted code upon ERROR, but there's also the chance that +LLVM itself runs out of memory. LLVM by default does *not* use any C++ +exceptions. Its allocations are primarily funneled through the +standard "new" handlers, and some direct use of malloc() and +mmap(). For the former a 'new handler' exists: +http://en.cppreference.com/w/cpp/memory/new/set_new_handler +For the latter LLVM provides callbacks that get called upon failure +(unfortunately mmap() failures are treated as fatal rather than OOM errors). +What we've chosen to do for now is have two functions that LLVM using code +must use: +extern void llvm_enter_fatal_on_oom(void); +extern void llvm_leave_fatal_on_oom(void); +before interacting with LLVM code. + +When a libstdc++ new or LLVM error occurs, the handlers set up by the +above functions trigger a FATAL error. We have to use FATAL rather +than ERROR, as we *cannot* reliably throw ERROR inside a foreign +library without risking corrupting its internal state. + +Users of the above sections do *not* have to use PG_TRY/CATCH blocks, +the handlers instead are reset on toplevel sigsetjmp() level. + +Using a relatively small enter/leave protected section of code, rather +than setting up these handlers globally, avoids negative interactions +with extensions that might use C++ such as PostGIS. As LLVM code +generation should never execute arbitrary code, just setting these +handlers temporarily ought to suffice. + + +Type Synchronization +-------------------- + +To be able to generate code that can perform tasks done by "interpreted" +PostgreSQL, it obviously is required that code generation knows about at +least a few PostgreSQL types. While it is possible to inform LLVM about +type definitions by recreating them manually in C code, that is failure +prone and labor intensive. + +Instead there is one small file (llvmjit_types.c) which references each of +the types required for JITing. That file is translated to bitcode at +compile time, and loaded when LLVM is initialized in a backend. + +That works very well to synchronize the type definition, but unfortunately +it does *not* synchronize offsets as the IR level representation doesn't +know field names. Instead, required offsets are maintained as defines in +the original struct definition, like so: +#define FIELDNO_TUPLETABLESLOT_NVALID 9 + int tts_nvalid; /* # of valid values in tts_values */ +While that still needs to be defined, it's only required for a +relatively small number of fields, and it's bunched together with the +struct definition, so it's easily kept synchronized. + + +Inlining +-------- + +One big advantage of JITing expressions is that it can significantly +reduce the overhead of PostgreSQL's extensible function/operator +mechanism, by inlining the body of called functions/operators. + +It obviously is undesirable to maintain a second implementation of +commonly used functions, just for inlining purposes. Instead we take +advantage of the fact that the Clang compiler can emit LLVM IR. + +The ability to do so allows us to get the LLVM IR for all operators +(e.g. int8eq, float8pl etc), without maintaining two copies. These +bitcode files get installed into the server's + $pkglibdir/bitcode/postgres/ +Using existing LLVM functionality (for parallel LTO compilation), +additionally an index is over these is stored to +$pkglibdir/bitcode/postgres.index.bc + +Similarly extensions can install code into + $pkglibdir/bitcode/[extension]/ +accompanied by + $pkglibdir/bitcode/[extension].index.bc + +just alongside the actual library. An extension's index will be used +to look up symbols when located in the corresponding shared +library. Symbols that are used inside the extension, when inlined, +will be first looked up in the main binary and then the extension's. + + +Caching +------- + +Currently it is not yet possible to cache generated functions, even +though that'd be desirable from a performance point of view. The +problem is that the generated functions commonly contain pointers into +per-execution memory. The expression evaluation machinery needs to +be redesigned a bit to avoid that. Basically all per-execution memory +needs to be referenced as an offset to one block of memory stored in +an ExprState, rather than absolute pointers into memory. + +Once that is addressed, adding an LRU cache that's keyed by the +generated LLVM IR will allow the usage of optimized functions even for +faster queries. + +A longer term project is to move expression compilation to the planner +stage, allowing e.g. to tie compiled expressions to prepared +statements. + +An even more advanced approach would be to use JIT with few +optimizations initially, and build an optimized version in the +background. But that's even further off. + + +What to JIT +=========== + +Currently expression evaluation and tuple deforming are JITed. Those +were chosen because they commonly are major CPU bottlenecks in +analytics queries, but are by no means the only potentially beneficial cases. + +For JITing to be beneficial a piece of code first and foremost has to +be a CPU bottleneck. But also importantly, JITing can only be +beneficial if overhead can be removed by doing so. E.g. in the tuple +deforming case the knowledge about the number of columns and their +types can remove a significant number of branches, and in the +expression evaluation case a lot of indirect jumps/calls can be +removed. If neither of these is the case, JITing is a waste of +resources. + +Future avenues for JITing are tuple sorting, COPY parsing/output +generation, and later compiling larger parts of queries. + + +When to JIT +=========== + +Currently there are a number of GUCs that influence JITing: + +- jit_above_cost = -1, 0-DBL_MAX - all queries with a higher total cost + get JITed, *without* optimization (expensive part), corresponding to + -O0. This commonly already results in significant speedups if + expression/deforming is a bottleneck (removing dynamic branches + mostly). +- jit_optimize_above_cost = -1, 0-DBL_MAX - all queries with a higher total cost + get JITed, *with* optimization (expensive part). +- jit_inline_above_cost = -1, 0-DBL_MAX - inlining is tried if query has + higher cost. + +Whenever a query's total cost is above these limits, JITing is +performed. + +Alternative costing models, e.g. by generating separate paths for +parts of a query with lower cpu_* costs, are also a possibility, but +it's doubtful the overhead of doing so is sufficient. Another +alternative would be to count the number of times individual +expressions are estimated to be evaluated, and perform JITing of these +individual expressions. + +The obvious seeming approach of JITing expressions individually after +a number of execution turns out not to work too well. Primarily +because emitting many small functions individually has significant +overhead. Secondarily because the time until JITing occurs causes +relative slowdowns that eat into the gain of JIT compilation. diff --git a/src/backend/jit/jit.c b/src/backend/jit/jit.c new file mode 100644 index 0000000..2da300e --- /dev/null +++ b/src/backend/jit/jit.c @@ -0,0 +1,208 @@ +/*------------------------------------------------------------------------- + * + * jit.c + * Provider independent JIT infrastructure. + * + * Code related to loading JIT providers, redirecting calls into JIT providers + * and error handling. No code specific to a specific JIT implementation + * should end up here. + * + * + * Copyright (c) 2016-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/jit/jit.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "executor/execExpr.h" +#include "fmgr.h" +#include "jit/jit.h" +#include "miscadmin.h" +#include "utils/fmgrprotos.h" +#include "utils/resowner_private.h" + +/* GUCs */ +bool jit_enabled = true; +char *jit_provider = NULL; +bool jit_debugging_support = false; +bool jit_dump_bitcode = false; +bool jit_expressions = true; +bool jit_profiling_support = false; +bool jit_tuple_deforming = true; +double jit_above_cost = 100000; +double jit_inline_above_cost = 500000; +double jit_optimize_above_cost = 500000; + +static JitProviderCallbacks provider; +static bool provider_successfully_loaded = false; +static bool provider_failed_loading = false; + + +static bool provider_init(void); +static bool file_exists(const char *name); + + +/* + * SQL level function returning whether JIT is available in the current + * backend. Will attempt to load JIT provider if necessary. + */ +Datum +pg_jit_available(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(provider_init()); +} + + +/* + * Return whether a JIT provider has successfully been loaded, caching the + * result. + */ +static bool +provider_init(void) +{ + char path[MAXPGPATH]; + JitProviderInit init; + + /* don't even try to load if not enabled */ + if (!jit_enabled) + return false; + + /* + * Don't retry loading after failing - attempting to load JIT provider + * isn't cheap. + */ + if (provider_failed_loading) + return false; + if (provider_successfully_loaded) + return true; + + /* + * Check whether shared library exists. We do that check before actually + * attempting to load the shared library (via load_external_function()), + * because that'd error out in case the shlib isn't available. + */ + snprintf(path, MAXPGPATH, "%s/%s%s", pkglib_path, jit_provider, DLSUFFIX); + elog(DEBUG1, "probing availability of JIT provider at %s", path); + if (!file_exists(path)) + { + elog(DEBUG1, + "provider not available, disabling JIT for current session"); + provider_failed_loading = true; + return false; + } + + /* + * If loading functions fails, signal failure. We do so because + * load_external_function() might error out despite the above check if + * e.g. the library's dependencies aren't installed. We want to signal + * ERROR in that case, so the user is notified, but we don't want to + * continually retry. + */ + provider_failed_loading = true; + + /* and initialize */ + init = (JitProviderInit) + load_external_function(path, "_PG_jit_provider_init", true, NULL); + init(&provider); + + provider_successfully_loaded = true; + provider_failed_loading = false; + + elog(DEBUG1, "successfully loaded JIT provider in current session"); + + return true; +} + +/* + * Reset JIT provider's error handling. This'll be called after an error has + * been thrown and the main-loop has re-established control. + */ +void +jit_reset_after_error(void) +{ + if (provider_successfully_loaded) + provider.reset_after_error(); +} + +/* + * Release resources required by one JIT context. + */ +void +jit_release_context(JitContext *context) +{ + if (provider_successfully_loaded) + provider.release_context(context); + + ResourceOwnerForgetJIT(context->resowner, PointerGetDatum(context)); + pfree(context); +} + +/* + * Ask provider to JIT compile an expression. + * + * Returns true if successful, false if not. + */ +bool +jit_compile_expr(struct ExprState *state) +{ + /* + * We can easily create a one-off context for functions without an + * associated PlanState (and thus EState). But because there's no executor + * shutdown callback that could deallocate the created function, they'd + * live to the end of the transactions, where they'd be cleaned up by the + * resowner machinery. That can lead to a noticeable amount of memory + * usage, and worse, trigger some quadratic behaviour in gdb. Therefore, + * at least for now, don't create a JITed function in those circumstances. + */ + if (!state->parent) + return false; + + /* if no jitting should be performed at all */ + if (!(state->parent->state->es_jit_flags & PGJIT_PERFORM)) + return false; + + /* or if expressions aren't JITed */ + if (!(state->parent->state->es_jit_flags & PGJIT_EXPR)) + return false; + + /* this also takes !jit_enabled into account */ + if (provider_init()) + return provider.compile_expr(state); + + return false; +} + +/* Aggregate JIT instrumentation information */ +void +InstrJitAgg(JitInstrumentation *dst, JitInstrumentation *add) +{ + dst->created_functions += add->created_functions; + INSTR_TIME_ADD(dst->generation_counter, add->generation_counter); + INSTR_TIME_ADD(dst->inlining_counter, add->inlining_counter); + INSTR_TIME_ADD(dst->optimization_counter, add->optimization_counter); + INSTR_TIME_ADD(dst->emission_counter, add->emission_counter); +} + +static bool +file_exists(const char *name) +{ + struct stat st; + + AssertArg(name != NULL); + + if (stat(name, &st) == 0) + return S_ISDIR(st.st_mode) ? false : true; + else if (!(errno == ENOENT || errno == ENOTDIR)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not access file \"%s\": %m", name))); + + return false; +} diff --git a/src/backend/jit/llvm/Makefile b/src/backend/jit/llvm/Makefile new file mode 100644 index 0000000..2da122a --- /dev/null +++ b/src/backend/jit/llvm/Makefile @@ -0,0 +1,76 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile the LLVM JIT provider, building it into a shared library. +# +# Note that this file is recursed into from src/Makefile, not by the +# parent directory.. +# +# IDENTIFICATION +# src/backend/jit/llvm/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/jit/llvm +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +ifneq ($(with_llvm), yes) + $(error "not building with LLVM support") +endif + +PGFILEDESC = "llvmjit - JIT using LLVM" +NAME = llvmjit + +# LLVM 14 produces deprecation warnings. We'll need to make some changes +# before the relevant functions are removed, but for now silence the warnings. +ifeq ($(GCC), yes) +LLVM_CFLAGS += -Wno-deprecated-declarations +endif + +# All files in this directory use LLVM. +CFLAGS += $(LLVM_CFLAGS) +CXXFLAGS += $(LLVM_CXXFLAGS) +override CPPFLAGS := $(LLVM_CPPFLAGS) $(CPPFLAGS) +SHLIB_LINK += $(LLVM_LIBS) + +# Because this module includes C++ files, we need to use a C++ +# compiler for linking. Makefile.shlib uses $(COMPILER) to build +# loadable modules. +override COMPILER = $(CXX) $(CFLAGS) + +OBJS = \ + $(WIN32RES) + +# Infrastructure +OBJS += \ + llvmjit.o \ + llvmjit_error.o \ + llvmjit_inline.o \ + llvmjit_wrap.o + +# Code generation +OBJS += \ + llvmjit_deform.o \ + llvmjit_expr.o + +all: all-shared-lib llvmjit_types.bc + +install: all installdirs install-lib install-types + +installdirs: installdirs-lib + +uninstall: uninstall-lib uninstall-types + +# Note this is intentionally not in bitcodedir, as it's not for inlining */ +install-types: llvmjit_types.bc + $(INSTALL_DATA) llvmjit_types.bc '$(DESTDIR)$(pkglibdir)' + +uninstall-types: + rm -f '$(DESTDIR)$(pkglibdir)/llvmjit_types.bc' + +include $(top_srcdir)/src/Makefile.shlib + +clean distclean maintainer-clean: clean-lib + rm -f $(OBJS) + rm -f llvmjit_types.bc diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c new file mode 100644 index 0000000..fb29449 --- /dev/null +++ b/src/backend/jit/llvm/llvmjit.c @@ -0,0 +1,1250 @@ +/*------------------------------------------------------------------------- + * + * llvmjit.c + * Core part of the LLVM JIT provider. + * + * Copyright (c) 2016-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/jit/llvm/llvmjit.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <llvm-c/Analysis.h> +#include <llvm-c/BitReader.h> +#include <llvm-c/BitWriter.h> +#include <llvm-c/Core.h> +#include <llvm-c/ExecutionEngine.h> +#if LLVM_VERSION_MAJOR > 11 +#include <llvm-c/Orc.h> +#include <llvm-c/OrcEE.h> +#include <llvm-c/LLJIT.h> +#else +#include <llvm-c/OrcBindings.h> +#endif +#include <llvm-c/Support.h> +#include <llvm-c/Target.h> +#include <llvm-c/Transforms/IPO.h> +#include <llvm-c/Transforms/PassManagerBuilder.h> +#include <llvm-c/Transforms/Scalar.h> +#if LLVM_VERSION_MAJOR > 6 +#include <llvm-c/Transforms/Utils.h> +#endif + +#include "jit/llvmjit.h" +#include "jit/llvmjit_emit.h" +#include "miscadmin.h" +#include "portability/instr_time.h" +#include "storage/ipc.h" +#include "utils/memutils.h" +#include "utils/resowner_private.h" + +/* Handle of a module emitted via ORC JIT */ +typedef struct LLVMJitHandle +{ +#if LLVM_VERSION_MAJOR > 11 + LLVMOrcLLJITRef lljit; + LLVMOrcResourceTrackerRef resource_tracker; +#else + LLVMOrcJITStackRef stack; + LLVMOrcModuleHandle orc_handle; +#endif +} LLVMJitHandle; + + +/* types & functions commonly needed for JITing */ +LLVMTypeRef TypeSizeT; +LLVMTypeRef TypeParamBool; +LLVMTypeRef TypeStorageBool; +LLVMTypeRef TypePGFunction; +LLVMTypeRef StructNullableDatum; +LLVMTypeRef StructHeapTupleFieldsField3; +LLVMTypeRef StructHeapTupleFields; +LLVMTypeRef StructHeapTupleHeaderData; +LLVMTypeRef StructHeapTupleDataChoice; +LLVMTypeRef StructHeapTupleData; +LLVMTypeRef StructMinimalTupleData; +LLVMTypeRef StructItemPointerData; +LLVMTypeRef StructBlockId; +LLVMTypeRef StructFormPgAttribute; +LLVMTypeRef StructTupleConstr; +LLVMTypeRef StructTupleDescData; +LLVMTypeRef StructTupleTableSlot; +LLVMTypeRef StructHeapTupleTableSlot; +LLVMTypeRef StructMinimalTupleTableSlot; +LLVMTypeRef StructMemoryContextData; +LLVMTypeRef StructPGFinfoRecord; +LLVMTypeRef StructFmgrInfo; +LLVMTypeRef StructFunctionCallInfoData; +LLVMTypeRef StructExprContext; +LLVMTypeRef StructExprEvalStep; +LLVMTypeRef StructExprState; +LLVMTypeRef StructAggState; +LLVMTypeRef StructAggStatePerGroupData; +LLVMTypeRef StructAggStatePerTransData; + +LLVMValueRef AttributeTemplate; + +LLVMModuleRef llvm_types_module = NULL; + +static bool llvm_session_initialized = false; +static size_t llvm_generation = 0; +static const char *llvm_triple = NULL; +static const char *llvm_layout = NULL; + + +static LLVMTargetRef llvm_targetref; +#if LLVM_VERSION_MAJOR > 11 +static LLVMOrcThreadSafeContextRef llvm_ts_context; +static LLVMOrcLLJITRef llvm_opt0_orc; +static LLVMOrcLLJITRef llvm_opt3_orc; +#else /* LLVM_VERSION_MAJOR > 11 */ +static LLVMOrcJITStackRef llvm_opt0_orc; +static LLVMOrcJITStackRef llvm_opt3_orc; +#endif /* LLVM_VERSION_MAJOR > 11 */ + + +static void llvm_release_context(JitContext *context); +static void llvm_session_initialize(void); +static void llvm_shutdown(int code, Datum arg); +static void llvm_compile_module(LLVMJitContext *context); +static void llvm_optimize_module(LLVMJitContext *context, LLVMModuleRef module); + +static void llvm_create_types(void); +static uint64_t llvm_resolve_symbol(const char *name, void *ctx); + +#if LLVM_VERSION_MAJOR > 11 +static LLVMOrcLLJITRef llvm_create_jit_instance(LLVMTargetMachineRef tm); +static char *llvm_error_message(LLVMErrorRef error); +#endif /* LLVM_VERSION_MAJOR > 11 */ + +PG_MODULE_MAGIC; + + +/* + * Initialize LLVM JIT provider. + */ +void +_PG_jit_provider_init(JitProviderCallbacks *cb) +{ + cb->reset_after_error = llvm_reset_after_error; + cb->release_context = llvm_release_context; + cb->compile_expr = llvm_compile_expr; +} + +/* + * Create a context for JITing work. + * + * The context, including subsidiary resources, will be cleaned up either when + * the context is explicitly released, or when the lifetime of + * CurrentResourceOwner ends (usually the end of the current [sub]xact). + */ +LLVMJitContext * +llvm_create_context(int jitFlags) +{ + LLVMJitContext *context; + + llvm_assert_in_fatal_section(); + + llvm_session_initialize(); + + ResourceOwnerEnlargeJIT(CurrentResourceOwner); + + context = MemoryContextAllocZero(TopMemoryContext, + sizeof(LLVMJitContext)); + context->base.flags = jitFlags; + + /* ensure cleanup */ + context->base.resowner = CurrentResourceOwner; + ResourceOwnerRememberJIT(CurrentResourceOwner, PointerGetDatum(context)); + + return context; +} + +/* + * Release resources required by one llvm context. + */ +static void +llvm_release_context(JitContext *context) +{ + LLVMJitContext *llvm_context = (LLVMJitContext *) context; + ListCell *lc; + + /* + * When this backend is exiting, don't clean up LLVM. As an error might + * have occurred from within LLVM, we do not want to risk reentering. All + * resource cleanup is going to happen through process exit. + */ + if (proc_exit_inprogress) + return; + + llvm_enter_fatal_on_oom(); + + if (llvm_context->module) + { + LLVMDisposeModule(llvm_context->module); + llvm_context->module = NULL; + } + + foreach(lc, llvm_context->handles) + { + LLVMJitHandle *jit_handle = (LLVMJitHandle *) lfirst(lc); + +#if LLVM_VERSION_MAJOR > 11 + { + LLVMOrcExecutionSessionRef ee; + LLVMOrcSymbolStringPoolRef sp; + + LLVMOrcResourceTrackerRemove(jit_handle->resource_tracker); + LLVMOrcReleaseResourceTracker(jit_handle->resource_tracker); + + /* + * Without triggering cleanup of the string pool, we'd leak + * memory. It'd be sufficient to do this far less often, but in + * experiments the required time was small enough to just always + * do it. + */ + ee = LLVMOrcLLJITGetExecutionSession(jit_handle->lljit); + sp = LLVMOrcExecutionSessionGetSymbolStringPool(ee); + LLVMOrcSymbolStringPoolClearDeadEntries(sp); + } +#else /* LLVM_VERSION_MAJOR > 11 */ + { + LLVMOrcRemoveModule(jit_handle->stack, jit_handle->orc_handle); + } +#endif /* LLVM_VERSION_MAJOR > 11 */ + + pfree(jit_handle); + } + list_free(llvm_context->handles); + llvm_context->handles = NIL; +} + +/* + * Return module which may be modified, e.g. by creating new functions. + */ +LLVMModuleRef +llvm_mutable_module(LLVMJitContext *context) +{ + llvm_assert_in_fatal_section(); + + /* + * If there's no in-progress module, create a new one. + */ + if (!context->module) + { + context->compiled = false; + context->module_generation = llvm_generation++; + context->module = LLVMModuleCreateWithName("pg"); + LLVMSetTarget(context->module, llvm_triple); + LLVMSetDataLayout(context->module, llvm_layout); + } + + return context->module; +} + +/* + * Expand function name to be non-conflicting. This should be used by code + * generating code, when adding new externally visible function definitions to + * a Module. + */ +char * +llvm_expand_funcname(struct LLVMJitContext *context, const char *basename) +{ + Assert(context->module != NULL); + + context->base.instr.created_functions++; + + /* + * Previously we used dots to separate, but turns out some tools, e.g. + * GDB, don't like that and truncate name. + */ + return psprintf("%s_%zu_%d", + basename, + context->module_generation, + context->counter++); +} + +/* + * Return pointer to function funcname, which has to exist. If there's pending + * code to be optimized and emitted, do so first. + */ +void * +llvm_get_function(LLVMJitContext *context, const char *funcname) +{ +#if LLVM_VERSION_MAJOR > 11 || \ + defined(HAVE_DECL_LLVMORCGETSYMBOLADDRESSIN) && HAVE_DECL_LLVMORCGETSYMBOLADDRESSIN + ListCell *lc; +#endif + + llvm_assert_in_fatal_section(); + + /* + * If there is a pending / not emitted module, compile and emit now. + * Otherwise we might not find the [correct] function. + */ + if (!context->compiled) + { + llvm_compile_module(context); + } + + /* + * ORC's symbol table is of *unmangled* symbols. Therefore we don't need + * to mangle here. + */ + +#if LLVM_VERSION_MAJOR > 11 + foreach(lc, context->handles) + { + LLVMJitHandle *handle = (LLVMJitHandle *) lfirst(lc); + instr_time starttime; + instr_time endtime; + LLVMErrorRef error; + LLVMOrcJITTargetAddress addr; + + INSTR_TIME_SET_CURRENT(starttime); + + addr = 0; + error = LLVMOrcLLJITLookup(handle->lljit, &addr, funcname); + if (error) + elog(ERROR, "failed to look up symbol \"%s\": %s", + funcname, llvm_error_message(error)); + + /* + * LLJIT only actually emits code the first time a symbol is + * referenced. Thus add lookup time to emission time. That's counting + * a bit more than with older LLVM versions, but unlikely to ever + * matter. + */ + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(context->base.instr.emission_counter, + endtime, starttime); + + if (addr) + return (void *) (uintptr_t) addr; + } +#elif defined(HAVE_DECL_LLVMORCGETSYMBOLADDRESSIN) && HAVE_DECL_LLVMORCGETSYMBOLADDRESSIN + foreach(lc, context->handles) + { + LLVMOrcTargetAddress addr; + LLVMJitHandle *handle = (LLVMJitHandle *) lfirst(lc); + + addr = 0; + if (LLVMOrcGetSymbolAddressIn(handle->stack, &addr, handle->orc_handle, funcname)) + elog(ERROR, "failed to look up symbol \"%s\"", funcname); + if (addr) + return (void *) (uintptr_t) addr; + } +#elif LLVM_VERSION_MAJOR < 5 + { + LLVMOrcTargetAddress addr; + + if ((addr = LLVMOrcGetSymbolAddress(llvm_opt0_orc, funcname))) + return (void *) (uintptr_t) addr; + if ((addr = LLVMOrcGetSymbolAddress(llvm_opt3_orc, funcname))) + return (void *) (uintptr_t) addr; + } +#else + { + LLVMOrcTargetAddress addr; + + if (LLVMOrcGetSymbolAddress(llvm_opt0_orc, &addr, funcname)) + elog(ERROR, "failed to look up symbol \"%s\"", funcname); + if (addr) + return (void *) (uintptr_t) addr; + if (LLVMOrcGetSymbolAddress(llvm_opt3_orc, &addr, funcname)) + elog(ERROR, "failed to look up symbol \"%s\"", funcname); + if (addr) + return (void *) (uintptr_t) addr; + } +#endif + + elog(ERROR, "failed to JIT: %s", funcname); + + return NULL; +} + +/* + * Return type of a variable in llvmjit_types.c. This is useful to keep types + * in sync between plain C and JIT related code. + */ +LLVMTypeRef +llvm_pg_var_type(const char *varname) +{ + LLVMValueRef v_srcvar; + LLVMTypeRef typ; + + /* this'll return a *pointer* to the global */ + v_srcvar = LLVMGetNamedGlobal(llvm_types_module, varname); + if (!v_srcvar) + elog(ERROR, "variable %s not in llvmjit_types.c", varname); + + /* look at the contained type */ + typ = LLVMTypeOf(v_srcvar); + Assert(typ != NULL && LLVMGetTypeKind(typ) == LLVMPointerTypeKind); + typ = LLVMGetElementType(typ); + Assert(typ != NULL); + + return typ; +} + +/* + * Return function type of a variable in llvmjit_types.c. This is useful to + * keep function types in sync between C and JITed code. + */ +LLVMTypeRef +llvm_pg_var_func_type(const char *varname) +{ + LLVMTypeRef typ = llvm_pg_var_type(varname); + + /* look at the contained type */ + Assert(LLVMGetTypeKind(typ) == LLVMPointerTypeKind); + typ = LLVMGetElementType(typ); + Assert(typ != NULL && LLVMGetTypeKind(typ) == LLVMFunctionTypeKind); + + return typ; +} + +/* + * Return declaration for a function referenced in llvmjit_types.c, adding it + * to the module if necessary. + * + * This is used to make functions discovered via llvm_create_types() known to + * the module that's currently being worked on. + */ +LLVMValueRef +llvm_pg_func(LLVMModuleRef mod, const char *funcname) +{ + LLVMValueRef v_srcfn; + LLVMValueRef v_fn; + + /* don't repeatedly add function */ + v_fn = LLVMGetNamedFunction(mod, funcname); + if (v_fn) + return v_fn; + + v_srcfn = LLVMGetNamedFunction(llvm_types_module, funcname); + + if (!v_srcfn) + elog(ERROR, "function %s not in llvmjit_types.c", funcname); + + v_fn = LLVMAddFunction(mod, + funcname, + LLVMGetElementType(LLVMTypeOf(v_srcfn))); + llvm_copy_attributes(v_srcfn, v_fn); + + return v_fn; +} + +/* + * Copy attributes from one function to another, for a specific index (an + * index can reference return value, function and parameter attributes). + */ +static void +llvm_copy_attributes_at_index(LLVMValueRef v_from, LLVMValueRef v_to, uint32 index) +{ + int num_attributes; + LLVMAttributeRef *attrs; + + num_attributes = LLVMGetAttributeCountAtIndexPG(v_from, index); + + /* + * Not just for efficiency: LLVM <= 3.9 crashes when + * LLVMGetAttributesAtIndex() is called for an index with 0 attributes. + */ + if (num_attributes == 0) + return; + + attrs = palloc(sizeof(LLVMAttributeRef) * num_attributes); + LLVMGetAttributesAtIndex(v_from, index, attrs); + + for (int attno = 0; attno < num_attributes; attno++) + LLVMAddAttributeAtIndex(v_to, index, attrs[attno]); + + pfree(attrs); +} + +/* + * Copy all attributes from one function to another. I.e. function, return and + * parameters will be copied. + */ +void +llvm_copy_attributes(LLVMValueRef v_from, LLVMValueRef v_to) +{ + uint32 param_count; + + /* copy function attributes */ + llvm_copy_attributes_at_index(v_from, v_to, LLVMAttributeFunctionIndex); + + /* and the return value attributes */ + llvm_copy_attributes_at_index(v_from, v_to, LLVMAttributeReturnIndex); + + /* and each function parameter's attribute */ + param_count = LLVMCountParams(v_from); + + for (int paramidx = 1; paramidx <= param_count; paramidx++) + llvm_copy_attributes_at_index(v_from, v_to, paramidx); +} + +/* + * Return a callable LLVMValueRef for fcinfo. + */ +LLVMValueRef +llvm_function_reference(LLVMJitContext *context, + LLVMBuilderRef builder, + LLVMModuleRef mod, + FunctionCallInfo fcinfo) +{ + char *modname; + char *basename; + char *funcname; + + LLVMValueRef v_fn; + + fmgr_symbol(fcinfo->flinfo->fn_oid, &modname, &basename); + + if (modname != NULL && basename != NULL) + { + /* external function in loadable library */ + funcname = psprintf("pgextern.%s.%s", modname, basename); + } + else if (basename != NULL) + { + /* internal function */ + funcname = psprintf("%s", basename); + } + else + { + /* + * Function we don't know to handle, return pointer. We do so by + * creating a global constant containing a pointer to the function. + * Makes IR more readable. + */ + LLVMValueRef v_fn_addr; + + funcname = psprintf("pgoidextern.%u", + fcinfo->flinfo->fn_oid); + v_fn = LLVMGetNamedGlobal(mod, funcname); + if (v_fn != 0) + return LLVMBuildLoad(builder, v_fn, ""); + + v_fn_addr = l_ptr_const(fcinfo->flinfo->fn_addr, TypePGFunction); + + v_fn = LLVMAddGlobal(mod, TypePGFunction, funcname); + LLVMSetInitializer(v_fn, v_fn_addr); + LLVMSetGlobalConstant(v_fn, true); + LLVMSetLinkage(v_fn, LLVMPrivateLinkage); + LLVMSetUnnamedAddr(v_fn, true); + + return LLVMBuildLoad(builder, v_fn, ""); + } + + /* check if function already has been added */ + v_fn = LLVMGetNamedFunction(mod, funcname); + if (v_fn != 0) + return v_fn; + + v_fn = LLVMAddFunction(mod, funcname, LLVMGetElementType(TypePGFunction)); + + return v_fn; +} + +/* + * Optimize code in module using the flags set in context. + */ +static void +llvm_optimize_module(LLVMJitContext *context, LLVMModuleRef module) +{ + LLVMPassManagerBuilderRef llvm_pmb; + LLVMPassManagerRef llvm_mpm; + LLVMPassManagerRef llvm_fpm; + LLVMValueRef func; + int compile_optlevel; + + if (context->base.flags & PGJIT_OPT3) + compile_optlevel = 3; + else + compile_optlevel = 0; + + /* + * Have to create a new pass manager builder every pass through, as the + * inliner has some per-builder state. Otherwise one ends up only inlining + * a function the first time though. + */ + llvm_pmb = LLVMPassManagerBuilderCreate(); + LLVMPassManagerBuilderSetOptLevel(llvm_pmb, compile_optlevel); + llvm_fpm = LLVMCreateFunctionPassManagerForModule(module); + + if (context->base.flags & PGJIT_OPT3) + { + /* TODO: Unscientifically determined threshold */ + LLVMPassManagerBuilderUseInlinerWithThreshold(llvm_pmb, 512); + } + else + { + /* we rely on mem2reg heavily, so emit even in the O0 case */ + LLVMAddPromoteMemoryToRegisterPass(llvm_fpm); + } + + LLVMPassManagerBuilderPopulateFunctionPassManager(llvm_pmb, llvm_fpm); + + /* + * Do function level optimization. This could be moved to the point where + * functions are emitted, to reduce memory usage a bit. + */ + LLVMInitializeFunctionPassManager(llvm_fpm); + for (func = LLVMGetFirstFunction(context->module); + func != NULL; + func = LLVMGetNextFunction(func)) + LLVMRunFunctionPassManager(llvm_fpm, func); + LLVMFinalizeFunctionPassManager(llvm_fpm); + LLVMDisposePassManager(llvm_fpm); + + /* + * Perform module level optimization. We do so even in the non-optimized + * case, so always-inline functions etc get inlined. It's cheap enough. + */ + llvm_mpm = LLVMCreatePassManager(); + LLVMPassManagerBuilderPopulateModulePassManager(llvm_pmb, + llvm_mpm); + /* always use always-inliner pass */ + if (!(context->base.flags & PGJIT_OPT3)) + LLVMAddAlwaysInlinerPass(llvm_mpm); + /* if doing inlining, but no expensive optimization, add inlining pass */ + if (context->base.flags & PGJIT_INLINE + && !(context->base.flags & PGJIT_OPT3)) + LLVMAddFunctionInliningPass(llvm_mpm); + LLVMRunPassManager(llvm_mpm, context->module); + LLVMDisposePassManager(llvm_mpm); + + LLVMPassManagerBuilderDispose(llvm_pmb); +} + +/* + * Emit code for the currently pending module. + */ +static void +llvm_compile_module(LLVMJitContext *context) +{ + LLVMJitHandle *handle; + MemoryContext oldcontext; + instr_time starttime; + instr_time endtime; +#if LLVM_VERSION_MAJOR > 11 + LLVMOrcLLJITRef compile_orc; +#else + LLVMOrcJITStackRef compile_orc; +#endif + + if (context->base.flags & PGJIT_OPT3) + compile_orc = llvm_opt3_orc; + else + compile_orc = llvm_opt0_orc; + + /* perform inlining */ + if (context->base.flags & PGJIT_INLINE) + { + INSTR_TIME_SET_CURRENT(starttime); + llvm_inline(context->module); + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(context->base.instr.inlining_counter, + endtime, starttime); + } + + if (jit_dump_bitcode) + { + char *filename; + + filename = psprintf("%u.%zu.bc", + MyProcPid, + context->module_generation); + LLVMWriteBitcodeToFile(context->module, filename); + pfree(filename); + } + + + /* optimize according to the chosen optimization settings */ + INSTR_TIME_SET_CURRENT(starttime); + llvm_optimize_module(context, context->module); + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(context->base.instr.optimization_counter, + endtime, starttime); + + if (jit_dump_bitcode) + { + char *filename; + + filename = psprintf("%u.%zu.optimized.bc", + MyProcPid, + context->module_generation); + LLVMWriteBitcodeToFile(context->module, filename); + pfree(filename); + } + + handle = (LLVMJitHandle *) + MemoryContextAlloc(TopMemoryContext, sizeof(LLVMJitHandle)); + + /* + * Emit the code. Note that this can, depending on the optimization + * settings, take noticeable resources as code emission executes low-level + * instruction combining/selection passes etc. Without optimization a + * faster instruction selection mechanism is used. + */ + INSTR_TIME_SET_CURRENT(starttime); +#if LLVM_VERSION_MAJOR > 11 + { + LLVMOrcThreadSafeModuleRef ts_module; + LLVMErrorRef error; + LLVMOrcJITDylibRef jd = LLVMOrcLLJITGetMainJITDylib(compile_orc); + + ts_module = LLVMOrcCreateNewThreadSafeModule(context->module, llvm_ts_context); + + handle->lljit = compile_orc; + handle->resource_tracker = LLVMOrcJITDylibCreateResourceTracker(jd); + + /* + * NB: This doesn't actually emit code. That happens lazily the first + * time a symbol defined in the module is requested. Due to that + * llvm_get_function() also accounts for emission time. + */ + + context->module = NULL; /* will be owned by LLJIT */ + error = LLVMOrcLLJITAddLLVMIRModuleWithRT(compile_orc, + handle->resource_tracker, + ts_module); + + if (error) + elog(ERROR, "failed to JIT module: %s", + llvm_error_message(error)); + + handle->lljit = compile_orc; + + /* LLVMOrcLLJITAddLLVMIRModuleWithRT takes ownership of the module */ + } +#elif LLVM_VERSION_MAJOR > 6 + { + handle->stack = compile_orc; + if (LLVMOrcAddEagerlyCompiledIR(compile_orc, &handle->orc_handle, context->module, + llvm_resolve_symbol, NULL)) + elog(ERROR, "failed to JIT module"); + + /* LLVMOrcAddEagerlyCompiledIR takes ownership of the module */ + } +#elif LLVM_VERSION_MAJOR > 4 + { + LLVMSharedModuleRef smod; + + smod = LLVMOrcMakeSharedModule(context->module); + handle->stack = compile_orc; + if (LLVMOrcAddEagerlyCompiledIR(compile_orc, &handle->orc_handle, smod, + llvm_resolve_symbol, NULL)) + elog(ERROR, "failed to JIT module"); + + LLVMOrcDisposeSharedModuleRef(smod); + } +#else /* LLVM 4.0 and 3.9 */ + { + handle->stack = compile_orc; + handle->orc_handle = LLVMOrcAddEagerlyCompiledIR(compile_orc, context->module, + llvm_resolve_symbol, NULL); + + LLVMDisposeModule(context->module); + } +#endif + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(context->base.instr.emission_counter, + endtime, starttime); + + context->module = NULL; + context->compiled = true; + + /* remember emitted code for cleanup and lookups */ + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + context->handles = lappend(context->handles, handle); + MemoryContextSwitchTo(oldcontext); + + ereport(DEBUG1, + (errmsg_internal("time to inline: %.3fs, opt: %.3fs, emit: %.3fs", + INSTR_TIME_GET_DOUBLE(context->base.instr.inlining_counter), + INSTR_TIME_GET_DOUBLE(context->base.instr.optimization_counter), + INSTR_TIME_GET_DOUBLE(context->base.instr.emission_counter)), + errhidestmt(true), + errhidecontext(true))); +} + +/* + * Per session initialization. + */ +static void +llvm_session_initialize(void) +{ + MemoryContext oldcontext; + char *error = NULL; + char *cpu = NULL; + char *features = NULL; + LLVMTargetMachineRef opt0_tm; + LLVMTargetMachineRef opt3_tm; + + if (llvm_session_initialized) + return; + + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + + LLVMInitializeNativeTarget(); + LLVMInitializeNativeAsmPrinter(); + LLVMInitializeNativeAsmParser(); + + /* + * Synchronize types early, as that also includes inferring the target + * triple. + */ + llvm_create_types(); + + if (LLVMGetTargetFromTriple(llvm_triple, &llvm_targetref, &error) != 0) + { + elog(FATAL, "failed to query triple %s\n", error); + } + + /* + * We want the generated code to use all available features. Therefore + * grab the host CPU string and detect features of the current CPU. The + * latter is needed because some CPU architectures default to enabling + * features not all CPUs have (weird, huh). + */ + cpu = LLVMGetHostCPUName(); + features = LLVMGetHostCPUFeatures(); + elog(DEBUG2, "LLVMJIT detected CPU \"%s\", with features \"%s\"", + cpu, features); + + opt0_tm = + LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features, + LLVMCodeGenLevelNone, + LLVMRelocDefault, + LLVMCodeModelJITDefault); + opt3_tm = + LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features, + LLVMCodeGenLevelAggressive, + LLVMRelocDefault, + LLVMCodeModelJITDefault); + + LLVMDisposeMessage(cpu); + cpu = NULL; + LLVMDisposeMessage(features); + features = NULL; + + /* force symbols in main binary to be loaded */ + LLVMLoadLibraryPermanently(NULL); + +#if LLVM_VERSION_MAJOR > 11 + { + llvm_ts_context = LLVMOrcCreateNewThreadSafeContext(); + + llvm_opt0_orc = llvm_create_jit_instance(opt0_tm); + opt0_tm = 0; + + llvm_opt3_orc = llvm_create_jit_instance(opt3_tm); + opt3_tm = 0; + } +#else /* LLVM_VERSION_MAJOR > 11 */ + { + llvm_opt0_orc = LLVMOrcCreateInstance(opt0_tm); + llvm_opt3_orc = LLVMOrcCreateInstance(opt3_tm); + +#if defined(HAVE_DECL_LLVMCREATEGDBREGISTRATIONLISTENER) && HAVE_DECL_LLVMCREATEGDBREGISTRATIONLISTENER + if (jit_debugging_support) + { + LLVMJITEventListenerRef l = LLVMCreateGDBRegistrationListener(); + + LLVMOrcRegisterJITEventListener(llvm_opt0_orc, l); + LLVMOrcRegisterJITEventListener(llvm_opt3_orc, l); + } +#endif +#if defined(HAVE_DECL_LLVMCREATEPERFJITEVENTLISTENER) && HAVE_DECL_LLVMCREATEPERFJITEVENTLISTENER + if (jit_profiling_support) + { + LLVMJITEventListenerRef l = LLVMCreatePerfJITEventListener(); + + LLVMOrcRegisterJITEventListener(llvm_opt0_orc, l); + LLVMOrcRegisterJITEventListener(llvm_opt3_orc, l); + } +#endif + } +#endif /* LLVM_VERSION_MAJOR > 11 */ + + on_proc_exit(llvm_shutdown, 0); + + llvm_session_initialized = true; + + MemoryContextSwitchTo(oldcontext); +} + +static void +llvm_shutdown(int code, Datum arg) +{ + /* + * If llvm_shutdown() is reached while in a fatal-on-oom section an error + * has occurred in the middle of LLVM code. It is not safe to call back + * into LLVM (which is why a FATAL error was thrown). + * + * We do need to shutdown LLVM in other shutdown cases, otherwise + * e.g. profiling data won't be written out. + */ + if (llvm_in_fatal_on_oom()) + { + Assert(proc_exit_inprogress); + return; + } + +#if LLVM_VERSION_MAJOR > 11 + { + if (llvm_opt3_orc) + { + LLVMOrcDisposeLLJIT(llvm_opt3_orc); + llvm_opt3_orc = NULL; + } + if (llvm_opt0_orc) + { + LLVMOrcDisposeLLJIT(llvm_opt0_orc); + llvm_opt0_orc = NULL; + } + if (llvm_ts_context) + { + LLVMOrcDisposeThreadSafeContext(llvm_ts_context); + llvm_ts_context = NULL; + } + } +#else /* LLVM_VERSION_MAJOR > 11 */ + { + /* unregister profiling support, needs to be flushed to be useful */ + + if (llvm_opt3_orc) + { +#if defined(HAVE_DECL_LLVMORCREGISTERPERF) && HAVE_DECL_LLVMORCREGISTERPERF + if (jit_profiling_support) + LLVMOrcUnregisterPerf(llvm_opt3_orc); +#endif + LLVMOrcDisposeInstance(llvm_opt3_orc); + llvm_opt3_orc = NULL; + } + + if (llvm_opt0_orc) + { +#if defined(HAVE_DECL_LLVMORCREGISTERPERF) && HAVE_DECL_LLVMORCREGISTERPERF + if (jit_profiling_support) + LLVMOrcUnregisterPerf(llvm_opt0_orc); +#endif + LLVMOrcDisposeInstance(llvm_opt0_orc); + llvm_opt0_orc = NULL; + } + } +#endif /* LLVM_VERSION_MAJOR > 11 */ +} + +/* helper for llvm_create_types, returning a function's return type */ +static LLVMTypeRef +load_return_type(LLVMModuleRef mod, const char *name) +{ + LLVMValueRef value; + LLVMTypeRef typ; + + /* this'll return a *pointer* to the function */ + value = LLVMGetNamedFunction(mod, name); + if (!value) + elog(ERROR, "function %s is unknown", name); + + /* get type of function pointer */ + typ = LLVMTypeOf(value); + Assert(typ != NULL); + /* dereference pointer */ + typ = LLVMGetElementType(typ); + Assert(typ != NULL); + /* and look at return type */ + typ = LLVMGetReturnType(typ); + Assert(typ != NULL); + + return typ; +} + +/* + * Load required information, types, function signatures from llvmjit_types.c + * and make them available in global variables. + * + * Those global variables are then used while emitting code. + */ +static void +llvm_create_types(void) +{ + char path[MAXPGPATH]; + LLVMMemoryBufferRef buf; + char *msg; + + snprintf(path, MAXPGPATH, "%s/%s", pkglib_path, "llvmjit_types.bc"); + + /* open file */ + if (LLVMCreateMemoryBufferWithContentsOfFile(path, &buf, &msg)) + { + elog(ERROR, "LLVMCreateMemoryBufferWithContentsOfFile(%s) failed: %s", + path, msg); + } + + /* eagerly load contents, going to need it all */ + if (LLVMParseBitcode2(buf, &llvm_types_module)) + { + elog(ERROR, "LLVMParseBitcode2 of %s failed", path); + } + LLVMDisposeMemoryBuffer(buf); + + /* + * Load triple & layout from clang emitted file so we're guaranteed to be + * compatible. + */ + llvm_triple = pstrdup(LLVMGetTarget(llvm_types_module)); + llvm_layout = pstrdup(LLVMGetDataLayoutStr(llvm_types_module)); + + TypeSizeT = llvm_pg_var_type("TypeSizeT"); + TypeParamBool = load_return_type(llvm_types_module, "FunctionReturningBool"); + TypeStorageBool = llvm_pg_var_type("TypeStorageBool"); + TypePGFunction = llvm_pg_var_type("TypePGFunction"); + StructNullableDatum = llvm_pg_var_type("StructNullableDatum"); + StructExprContext = llvm_pg_var_type("StructExprContext"); + StructExprEvalStep = llvm_pg_var_type("StructExprEvalStep"); + StructExprState = llvm_pg_var_type("StructExprState"); + StructFunctionCallInfoData = llvm_pg_var_type("StructFunctionCallInfoData"); + StructMemoryContextData = llvm_pg_var_type("StructMemoryContextData"); + StructTupleTableSlot = llvm_pg_var_type("StructTupleTableSlot"); + StructHeapTupleTableSlot = llvm_pg_var_type("StructHeapTupleTableSlot"); + StructMinimalTupleTableSlot = llvm_pg_var_type("StructMinimalTupleTableSlot"); + StructHeapTupleData = llvm_pg_var_type("StructHeapTupleData"); + StructTupleDescData = llvm_pg_var_type("StructTupleDescData"); + StructAggState = llvm_pg_var_type("StructAggState"); + StructAggStatePerGroupData = llvm_pg_var_type("StructAggStatePerGroupData"); + StructAggStatePerTransData = llvm_pg_var_type("StructAggStatePerTransData"); + + AttributeTemplate = LLVMGetNamedFunction(llvm_types_module, "AttributeTemplate"); +} + +/* + * Split a symbol into module / function parts. If the function is in the + * main binary (or an external library) *modname will be NULL. + */ +void +llvm_split_symbol_name(const char *name, char **modname, char **funcname) +{ + *modname = NULL; + *funcname = NULL; + + /* + * Module function names are pgextern.$module.$funcname + */ + if (strncmp(name, "pgextern.", strlen("pgextern.")) == 0) + { + /* + * Symbol names cannot contain a ., therefore we can split based on + * first and last occurrence of one. + */ + *funcname = rindex(name, '.'); + (*funcname)++; /* jump over . */ + + *modname = pnstrdup(name + strlen("pgextern."), + *funcname - name - strlen("pgextern.") - 1); + Assert(funcname); + + *funcname = pstrdup(*funcname); + } + else + { + *modname = NULL; + *funcname = pstrdup(name); + } +} + +/* + * Attempt to resolve symbol, so LLVM can emit a reference to it. + */ +static uint64_t +llvm_resolve_symbol(const char *symname, void *ctx) +{ + uintptr_t addr; + char *funcname; + char *modname; + + /* + * macOS prefixes all object level symbols with an underscore. But neither + * dlsym() nor PG's inliner expect that. So undo. + */ +#if defined(__darwin__) + if (symname[0] != '_') + elog(ERROR, "expected prefixed symbol name, but got \"%s\"", symname); + symname++; +#endif + + llvm_split_symbol_name(symname, &modname, &funcname); + + /* functions that aren't resolved to names shouldn't ever get here */ + Assert(funcname); + + if (modname) + addr = (uintptr_t) load_external_function(modname, funcname, + true, NULL); + else + addr = (uintptr_t) LLVMSearchForAddressOfSymbol(symname); + + pfree(funcname); + if (modname) + pfree(modname); + + /* let LLVM will error out - should never happen */ + if (!addr) + elog(WARNING, "failed to resolve name %s", symname); + + return (uint64_t) addr; +} + +#if LLVM_VERSION_MAJOR > 11 + +static LLVMErrorRef +llvm_resolve_symbols(LLVMOrcDefinitionGeneratorRef GeneratorObj, void *Ctx, + LLVMOrcLookupStateRef * LookupState, LLVMOrcLookupKind Kind, + LLVMOrcJITDylibRef JD, LLVMOrcJITDylibLookupFlags JDLookupFlags, + LLVMOrcCLookupSet LookupSet, size_t LookupSetSize) +{ + LLVMOrcCSymbolMapPairs symbols = palloc0(sizeof(LLVMJITCSymbolMapPair) * LookupSetSize); + LLVMErrorRef error; + LLVMOrcMaterializationUnitRef mu; + + for (int i = 0; i < LookupSetSize; i++) + { + const char *name = LLVMOrcSymbolStringPoolEntryStr(LookupSet[i].Name); + +#if LLVM_VERSION_MAJOR > 12 + LLVMOrcRetainSymbolStringPoolEntry(LookupSet[i].Name); +#endif + symbols[i].Name = LookupSet[i].Name; + symbols[i].Sym.Address = llvm_resolve_symbol(name, NULL); + symbols[i].Sym.Flags.GenericFlags = LLVMJITSymbolGenericFlagsExported; + } + + mu = LLVMOrcAbsoluteSymbols(symbols, LookupSetSize); + error = LLVMOrcJITDylibDefine(JD, mu); + if (error != LLVMErrorSuccess) + LLVMOrcDisposeMaterializationUnit(mu); + + pfree(symbols); + + return error; +} + +/* + * We cannot throw errors through LLVM (without causing a FATAL at least), so + * just use WARNING here. That's OK anyway, as the error is also reported at + * the top level action (with less detail) and there might be multiple + * invocations of errors with details. + * + * This doesn't really happen during normal operation, but in cases like + * symbol resolution breakage. So just using elog(WARNING) is fine. + */ +static void +llvm_log_jit_error(void *ctx, LLVMErrorRef error) +{ + elog(WARNING, "error during JITing: %s", + llvm_error_message(error)); +} + +/* + * Create our own object layer, so we can add event listeners. + */ +static LLVMOrcObjectLayerRef +llvm_create_object_layer(void *Ctx, LLVMOrcExecutionSessionRef ES, const char *Triple) +{ + LLVMOrcObjectLayerRef objlayer = + LLVMOrcCreateRTDyldObjectLinkingLayerWithSectionMemoryManager(ES); + +#if defined(HAVE_DECL_LLVMCREATEGDBREGISTRATIONLISTENER) && HAVE_DECL_LLVMCREATEGDBREGISTRATIONLISTENER + if (jit_debugging_support) + { + LLVMJITEventListenerRef l = LLVMCreateGDBRegistrationListener(); + + LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(objlayer, l); + } +#endif + +#if defined(HAVE_DECL_LLVMCREATEPERFJITEVENTLISTENER) && HAVE_DECL_LLVMCREATEPERFJITEVENTLISTENER + if (jit_profiling_support) + { + LLVMJITEventListenerRef l = LLVMCreatePerfJITEventListener(); + + LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(objlayer, l); + } +#endif + + return objlayer; +} + +/* + * Create LLJIT instance, using the passed in target machine. Note that the + * target machine afterwards is owned by the LLJIT instance. + */ +static LLVMOrcLLJITRef +llvm_create_jit_instance(LLVMTargetMachineRef tm) +{ + LLVMOrcLLJITRef lljit; + LLVMOrcJITTargetMachineBuilderRef tm_builder; + LLVMOrcLLJITBuilderRef lljit_builder; + LLVMErrorRef error; + LLVMOrcDefinitionGeneratorRef main_gen; + LLVMOrcDefinitionGeneratorRef ref_gen; + + lljit_builder = LLVMOrcCreateLLJITBuilder(); + tm_builder = LLVMOrcJITTargetMachineBuilderCreateFromTargetMachine(tm); + LLVMOrcLLJITBuilderSetJITTargetMachineBuilder(lljit_builder, tm_builder); + + LLVMOrcLLJITBuilderSetObjectLinkingLayerCreator(lljit_builder, + llvm_create_object_layer, + NULL); + + error = LLVMOrcCreateLLJIT(&lljit, lljit_builder); + if (error) + elog(ERROR, "failed to create lljit instance: %s", + llvm_error_message(error)); + + LLVMOrcExecutionSessionSetErrorReporter(LLVMOrcLLJITGetExecutionSession(lljit), + llvm_log_jit_error, NULL); + + /* + * Symbol resolution support for symbols in the postgres binary / + * libraries already loaded. + */ + error = LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(&main_gen, + LLVMOrcLLJITGetGlobalPrefix(lljit), + 0, NULL); + if (error) + elog(ERROR, "failed to create generator: %s", + llvm_error_message(error)); + LLVMOrcJITDylibAddGenerator(LLVMOrcLLJITGetMainJITDylib(lljit), main_gen); + + /* + * Symbol resolution support for "special" functions, e.g. a call into an + * SQL callable function. + */ + ref_gen = LLVMOrcCreateCustomCAPIDefinitionGenerator(llvm_resolve_symbols, NULL); + LLVMOrcJITDylibAddGenerator(LLVMOrcLLJITGetMainJITDylib(lljit), ref_gen); + + return lljit; +} + +static char * +llvm_error_message(LLVMErrorRef error) +{ + char *orig = LLVMGetErrorMessage(error); + char *msg = pstrdup(orig); + + LLVMDisposeErrorMessage(orig); + + return msg; +} + +#endif /* LLVM_VERSION_MAJOR > 11 */ diff --git a/src/backend/jit/llvm/llvmjit_deform.c b/src/backend/jit/llvm/llvmjit_deform.c new file mode 100644 index 0000000..008cd61 --- /dev/null +++ b/src/backend/jit/llvm/llvmjit_deform.c @@ -0,0 +1,756 @@ +/*------------------------------------------------------------------------- + * + * llvmjit_deform.c + * Generate code for deforming a heap tuple. + * + * This gains performance benefits over unJITed deforming from compile-time + * knowledge of the tuple descriptor. Fixed column widths, NOT NULLness, etc + * can be taken advantage of. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/jit/llvm/llvmjit_deform.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <llvm-c/Core.h> + +#include "access/htup_details.h" +#include "access/tupdesc_details.h" +#include "executor/tuptable.h" +#include "jit/llvmjit.h" +#include "jit/llvmjit_emit.h" + + +/* + * Create a function that deforms a tuple of type desc up to natts columns. + */ +LLVMValueRef +slot_compile_deform(LLVMJitContext *context, TupleDesc desc, + const TupleTableSlotOps *ops, int natts) +{ + char *funcname; + + LLVMModuleRef mod; + LLVMBuilderRef b; + + LLVMTypeRef deform_sig; + LLVMValueRef v_deform_fn; + + LLVMBasicBlockRef b_entry; + LLVMBasicBlockRef b_adjust_unavail_cols; + LLVMBasicBlockRef b_find_start; + + LLVMBasicBlockRef b_out; + LLVMBasicBlockRef b_dead; + LLVMBasicBlockRef *attcheckattnoblocks; + LLVMBasicBlockRef *attstartblocks; + LLVMBasicBlockRef *attisnullblocks; + LLVMBasicBlockRef *attcheckalignblocks; + LLVMBasicBlockRef *attalignblocks; + LLVMBasicBlockRef *attstoreblocks; + + LLVMValueRef v_offp; + + LLVMValueRef v_tupdata_base; + LLVMValueRef v_tts_values; + LLVMValueRef v_tts_nulls; + LLVMValueRef v_slotoffp; + LLVMValueRef v_flagsp; + LLVMValueRef v_nvalidp; + LLVMValueRef v_nvalid; + LLVMValueRef v_maxatt; + + LLVMValueRef v_slot; + + LLVMValueRef v_tupleheaderp; + LLVMValueRef v_tuplep; + LLVMValueRef v_infomask1; + LLVMValueRef v_infomask2; + LLVMValueRef v_bits; + + LLVMValueRef v_hoff; + + LLVMValueRef v_hasnulls; + + /* last column (0 indexed) guaranteed to exist */ + int guaranteed_column_number = -1; + + /* current known alignment */ + int known_alignment = 0; + + /* if true, known_alignment describes definite offset of column */ + bool attguaranteedalign = true; + + int attnum; + + /* virtual tuples never need deforming, so don't generate code */ + if (ops == &TTSOpsVirtual) + return NULL; + + /* decline to JIT for slot types we don't know to handle */ + if (ops != &TTSOpsHeapTuple && ops != &TTSOpsBufferHeapTuple && + ops != &TTSOpsMinimalTuple) + return NULL; + + mod = llvm_mutable_module(context); + + funcname = llvm_expand_funcname(context, "deform"); + + /* + * Check which columns have to exist, so we don't have to check the row's + * natts unnecessarily. + */ + for (attnum = 0; attnum < desc->natts; attnum++) + { + Form_pg_attribute att = TupleDescAttr(desc, attnum); + + /* + * If the column is declared NOT NULL then it must be present in every + * tuple, unless there's a "missing" entry that could provide a + * non-NULL value for it. That in turn guarantees that the NULL bitmap + * - if there are any NULLable columns - is at least long enough to + * cover columns up to attnum. + * + * Be paranoid and also check !attisdropped, even though the + * combination of attisdropped && attnotnull combination shouldn't + * exist. + */ + if (att->attnotnull && + !att->atthasmissing && + !att->attisdropped) + guaranteed_column_number = attnum; + } + + /* Create the signature and function */ + { + LLVMTypeRef param_types[1]; + + param_types[0] = l_ptr(StructTupleTableSlot); + + deform_sig = LLVMFunctionType(LLVMVoidType(), param_types, + lengthof(param_types), 0); + } + v_deform_fn = LLVMAddFunction(mod, funcname, deform_sig); + LLVMSetLinkage(v_deform_fn, LLVMInternalLinkage); + LLVMSetParamAlignment(LLVMGetParam(v_deform_fn, 0), MAXIMUM_ALIGNOF); + llvm_copy_attributes(AttributeTemplate, v_deform_fn); + + b_entry = + LLVMAppendBasicBlock(v_deform_fn, "entry"); + b_adjust_unavail_cols = + LLVMAppendBasicBlock(v_deform_fn, "adjust_unavail_cols"); + b_find_start = + LLVMAppendBasicBlock(v_deform_fn, "find_startblock"); + b_out = + LLVMAppendBasicBlock(v_deform_fn, "outblock"); + b_dead = + LLVMAppendBasicBlock(v_deform_fn, "deadblock"); + + b = LLVMCreateBuilder(); + + attcheckattnoblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attstartblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attisnullblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attcheckalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attstoreblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + + known_alignment = 0; + + LLVMPositionBuilderAtEnd(b, b_entry); + + /* perform allocas first, llvm only converts those to registers */ + v_offp = LLVMBuildAlloca(b, TypeSizeT, "v_offp"); + + v_slot = LLVMGetParam(v_deform_fn, 0); + + v_tts_values = + l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_VALUES, + "tts_values"); + v_tts_nulls = + l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_ISNULL, + "tts_ISNULL"); + v_flagsp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_FLAGS, ""); + v_nvalidp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_NVALID, ""); + + if (ops == &TTSOpsHeapTuple || ops == &TTSOpsBufferHeapTuple) + { + LLVMValueRef v_heapslot; + + v_heapslot = + LLVMBuildBitCast(b, + v_slot, + l_ptr(StructHeapTupleTableSlot), + "heapslot"); + v_slotoffp = LLVMBuildStructGEP(b, v_heapslot, FIELDNO_HEAPTUPLETABLESLOT_OFF, ""); + v_tupleheaderp = + l_load_struct_gep(b, v_heapslot, FIELDNO_HEAPTUPLETABLESLOT_TUPLE, + "tupleheader"); + + } + else if (ops == &TTSOpsMinimalTuple) + { + LLVMValueRef v_minimalslot; + + v_minimalslot = + LLVMBuildBitCast(b, + v_slot, + l_ptr(StructMinimalTupleTableSlot), + "minimalslot"); + v_slotoffp = LLVMBuildStructGEP(b, v_minimalslot, FIELDNO_MINIMALTUPLETABLESLOT_OFF, ""); + v_tupleheaderp = + l_load_struct_gep(b, v_minimalslot, FIELDNO_MINIMALTUPLETABLESLOT_TUPLE, + "tupleheader"); + } + else + { + /* should've returned at the start of the function */ + pg_unreachable(); + } + + v_tuplep = + l_load_struct_gep(b, v_tupleheaderp, FIELDNO_HEAPTUPLEDATA_DATA, + "tuple"); + v_bits = + LLVMBuildBitCast(b, + LLVMBuildStructGEP(b, v_tuplep, + FIELDNO_HEAPTUPLEHEADERDATA_BITS, + ""), + l_ptr(LLVMInt8Type()), + "t_bits"); + v_infomask1 = + l_load_struct_gep(b, v_tuplep, + FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK, + "infomask1"); + v_infomask2 = + l_load_struct_gep(b, + v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2, + "infomask2"); + + /* t_infomask & HEAP_HASNULL */ + v_hasnulls = + LLVMBuildICmp(b, LLVMIntNE, + LLVMBuildAnd(b, + l_int16_const(HEAP_HASNULL), + v_infomask1, ""), + l_int16_const(0), + "hasnulls"); + + /* t_infomask2 & HEAP_NATTS_MASK */ + v_maxatt = LLVMBuildAnd(b, + l_int16_const(HEAP_NATTS_MASK), + v_infomask2, + "maxatt"); + + /* + * Need to zext, as getelementptr otherwise treats hoff as a signed 8bit + * integer, which'd yield a negative offset for t_hoff > 127. + */ + v_hoff = + LLVMBuildZExt(b, + l_load_struct_gep(b, v_tuplep, + FIELDNO_HEAPTUPLEHEADERDATA_HOFF, + ""), + LLVMInt32Type(), "t_hoff"); + + v_tupdata_base = + LLVMBuildGEP(b, + LLVMBuildBitCast(b, + v_tuplep, + l_ptr(LLVMInt8Type()), + ""), + &v_hoff, 1, + "v_tupdata_base"); + + /* + * Load tuple start offset from slot. Will be reset below in case there's + * no existing deformed columns in slot. + */ + { + LLVMValueRef v_off_start; + + v_off_start = LLVMBuildLoad(b, v_slotoffp, "v_slot_off"); + v_off_start = LLVMBuildZExt(b, v_off_start, TypeSizeT, ""); + LLVMBuildStore(b, v_off_start, v_offp); + } + + /* build the basic block for each attribute, need them as jump target */ + for (attnum = 0; attnum < natts; attnum++) + { + attcheckattnoblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckattno", attnum); + attstartblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.start", attnum); + attisnullblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.attisnull", attnum); + attcheckalignblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckalign", attnum); + attalignblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.align", attnum); + attstoreblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.store", attnum); + } + + /* + * Check if it is guaranteed that all the desired attributes are available + * in the tuple (but still possibly NULL), by dint of either the last + * to-be-deformed column being NOT NULL, or subsequent ones not accessed + * here being NOT NULL. If that's not guaranteed the tuple headers natt's + * has to be checked, and missing attributes potentially have to be + * fetched (using slot_getmissingattrs(). + */ + if ((natts - 1) <= guaranteed_column_number) + { + /* just skip through unnecessary blocks */ + LLVMBuildBr(b, b_adjust_unavail_cols); + LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols); + LLVMBuildBr(b, b_find_start); + } + else + { + LLVMValueRef v_params[3]; + + /* branch if not all columns available */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntULT, + v_maxatt, + l_int16_const(natts), + ""), + b_adjust_unavail_cols, + b_find_start); + + /* if not, memset tts_isnull of relevant cols to true */ + LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols); + + v_params[0] = v_slot; + v_params[1] = LLVMBuildZExt(b, v_maxatt, LLVMInt32Type(), ""); + v_params[2] = l_int32_const(natts); + LLVMBuildCall(b, llvm_pg_func(mod, "slot_getmissingattrs"), + v_params, lengthof(v_params), ""); + LLVMBuildBr(b, b_find_start); + } + + LLVMPositionBuilderAtEnd(b, b_find_start); + + v_nvalid = LLVMBuildLoad(b, v_nvalidp, ""); + + /* + * Build switch to go from nvalid to the right startblock. Callers + * currently don't have the knowledge, but it'd be good for performance to + * avoid this check when it's known that the slot is empty (e.g. in scan + * nodes). + */ + if (true) + { + LLVMValueRef v_switch = LLVMBuildSwitch(b, v_nvalid, + b_dead, natts); + + for (attnum = 0; attnum < natts; attnum++) + { + LLVMValueRef v_attno = l_int16_const(attnum); + + LLVMAddCase(v_switch, v_attno, attcheckattnoblocks[attnum]); + } + + } + else + { + /* jump from entry block to first block */ + LLVMBuildBr(b, attcheckattnoblocks[0]); + } + + LLVMPositionBuilderAtEnd(b, b_dead); + LLVMBuildUnreachable(b); + + /* + * Iterate over each attribute that needs to be deformed, build code to + * deform it. + */ + for (attnum = 0; attnum < natts; attnum++) + { + Form_pg_attribute att = TupleDescAttr(desc, attnum); + LLVMValueRef v_incby; + int alignto; + LLVMValueRef l_attno = l_int16_const(attnum); + LLVMValueRef v_attdatap; + LLVMValueRef v_resultp; + + /* build block checking whether we did all the necessary attributes */ + LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]); + + /* + * If this is the first attribute, slot->tts_nvalid was 0. Therefore + * also reset offset to 0, it may be from a previous execution. + */ + if (attnum == 0) + { + LLVMBuildStore(b, l_sizet_const(0), v_offp); + } + + /* + * Build check whether column is available (i.e. whether the tuple has + * that many columns stored). We can avoid the branch if we know + * there's a subsequent NOT NULL column. + */ + if (attnum <= guaranteed_column_number) + { + LLVMBuildBr(b, attstartblocks[attnum]); + } + else + { + LLVMValueRef v_islast; + + v_islast = LLVMBuildICmp(b, LLVMIntUGE, + l_attno, + v_maxatt, + "heap_natts"); + LLVMBuildCondBr(b, v_islast, b_out, attstartblocks[attnum]); + } + LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]); + + /* + * Check for nulls if necessary. No need to take missing attributes + * into account, because if they're present the heaptuple's natts + * would have indicated that a slot_getmissingattrs() is needed. + */ + if (!att->attnotnull) + { + LLVMBasicBlockRef b_ifnotnull; + LLVMBasicBlockRef b_ifnull; + LLVMBasicBlockRef b_next; + LLVMValueRef v_attisnull; + LLVMValueRef v_nullbyteno; + LLVMValueRef v_nullbytemask; + LLVMValueRef v_nullbyte; + LLVMValueRef v_nullbit; + + b_ifnotnull = attcheckalignblocks[attnum]; + b_ifnull = attisnullblocks[attnum]; + + if (attnum + 1 == natts) + b_next = b_out; + else + b_next = attcheckattnoblocks[attnum + 1]; + + v_nullbyteno = l_int32_const(attnum >> 3); + v_nullbytemask = l_int8_const(1 << ((attnum) & 0x07)); + v_nullbyte = l_load_gep1(b, v_bits, v_nullbyteno, "attnullbyte"); + + v_nullbit = LLVMBuildICmp(b, + LLVMIntEQ, + LLVMBuildAnd(b, v_nullbyte, v_nullbytemask, ""), + l_int8_const(0), + "attisnull"); + + v_attisnull = LLVMBuildAnd(b, v_hasnulls, v_nullbit, ""); + + LLVMBuildCondBr(b, v_attisnull, b_ifnull, b_ifnotnull); + + LLVMPositionBuilderAtEnd(b, b_ifnull); + + /* store null-byte */ + LLVMBuildStore(b, + l_int8_const(1), + LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, "")); + /* store zero datum */ + LLVMBuildStore(b, + l_sizet_const(0), + LLVMBuildGEP(b, v_tts_values, &l_attno, 1, "")); + + LLVMBuildBr(b, b_next); + attguaranteedalign = false; + } + else + { + /* nothing to do */ + LLVMBuildBr(b, attcheckalignblocks[attnum]); + LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]); + LLVMBuildBr(b, attcheckalignblocks[attnum]); + } + LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]); + + /* determine required alignment */ + if (att->attalign == TYPALIGN_INT) + alignto = ALIGNOF_INT; + else if (att->attalign == TYPALIGN_CHAR) + alignto = 1; + else if (att->attalign == TYPALIGN_DOUBLE) + alignto = ALIGNOF_DOUBLE; + else if (att->attalign == TYPALIGN_SHORT) + alignto = ALIGNOF_SHORT; + else + { + elog(ERROR, "unknown alignment"); + alignto = 0; + } + + /* ------ + * Even if alignment is required, we can skip doing it if provably + * unnecessary: + * - first column is guaranteed to be aligned + * - columns following a NOT NULL fixed width datum have known + * alignment, can skip alignment computation if that known alignment + * is compatible with current column. + * ------ + */ + if (alignto > 1 && + (known_alignment < 0 || known_alignment != TYPEALIGN(alignto, known_alignment))) + { + /* + * When accessing a varlena field, we have to "peek" to see if we + * are looking at a pad byte or the first byte of a 1-byte-header + * datum. A zero byte must be either a pad byte, or the first + * byte of a correctly aligned 4-byte length word; in either case, + * we can align safely. A non-zero byte must be either a 1-byte + * length word, or the first byte of a correctly aligned 4-byte + * length word; in either case, we need not align. + */ + if (att->attlen == -1) + { + LLVMValueRef v_possible_padbyte; + LLVMValueRef v_ispad; + LLVMValueRef v_off; + + /* don't know if short varlena or not */ + attguaranteedalign = false; + + v_off = LLVMBuildLoad(b, v_offp, ""); + + v_possible_padbyte = + l_load_gep1(b, v_tupdata_base, v_off, "padbyte"); + v_ispad = + LLVMBuildICmp(b, LLVMIntEQ, + v_possible_padbyte, l_int8_const(0), + "ispadbyte"); + LLVMBuildCondBr(b, v_ispad, + attalignblocks[attnum], + attstoreblocks[attnum]); + } + else + { + LLVMBuildBr(b, attalignblocks[attnum]); + } + + LLVMPositionBuilderAtEnd(b, attalignblocks[attnum]); + + /* translation of alignment code (cf TYPEALIGN()) */ + { + LLVMValueRef v_off_aligned; + LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, ""); + + /* ((ALIGNVAL) - 1) */ + LLVMValueRef v_alignval = l_sizet_const(alignto - 1); + + /* ((uintptr_t) (LEN) + ((ALIGNVAL) - 1)) */ + LLVMValueRef v_lh = LLVMBuildAdd(b, v_off, v_alignval, ""); + + /* ~((uintptr_t) ((ALIGNVAL) - 1)) */ + LLVMValueRef v_rh = l_sizet_const(~(alignto - 1)); + + v_off_aligned = LLVMBuildAnd(b, v_lh, v_rh, "aligned_offset"); + + LLVMBuildStore(b, v_off_aligned, v_offp); + } + + /* + * As alignment either was unnecessary or has been performed, we + * now know the current alignment. This is only safe because this + * value isn't used for varlena and nullable columns. + */ + if (known_alignment >= 0) + { + Assert(known_alignment != 0); + known_alignment = TYPEALIGN(alignto, known_alignment); + } + + LLVMBuildBr(b, attstoreblocks[attnum]); + LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]); + } + else + { + LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]); + LLVMBuildBr(b, attalignblocks[attnum]); + LLVMPositionBuilderAtEnd(b, attalignblocks[attnum]); + LLVMBuildBr(b, attstoreblocks[attnum]); + } + LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]); + + /* + * Store the current offset if known to be constant. That allows LLVM + * to generate better code. Without that LLVM can't figure out that + * the offset might be constant due to the jumps for previously + * decoded columns. + */ + if (attguaranteedalign) + { + Assert(known_alignment >= 0); + LLVMBuildStore(b, l_sizet_const(known_alignment), v_offp); + } + + /* compute what following columns are aligned to */ + if (att->attlen < 0) + { + /* can't guarantee any alignment after variable length field */ + known_alignment = -1; + attguaranteedalign = false; + } + else if (att->attnotnull && attguaranteedalign && known_alignment >= 0) + { + /* + * If the offset to the column was previously known, a NOT NULL & + * fixed-width column guarantees that alignment is just the + * previous alignment plus column width. + */ + Assert(att->attlen > 0); + known_alignment += att->attlen; + } + else if (att->attnotnull && (att->attlen % alignto) == 0) + { + /* + * After a NOT NULL fixed-width column with a length that is a + * multiple of its alignment requirement, we know the following + * column is aligned to at least the current column's alignment. + */ + Assert(att->attlen > 0); + known_alignment = alignto; + Assert(known_alignment > 0); + attguaranteedalign = false; + } + else + { + known_alignment = -1; + attguaranteedalign = false; + } + + + /* compute address to load data from */ + { + LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, ""); + + v_attdatap = + LLVMBuildGEP(b, v_tupdata_base, &v_off, 1, ""); + } + + /* compute address to store value at */ + v_resultp = LLVMBuildGEP(b, v_tts_values, &l_attno, 1, ""); + + /* store null-byte (false) */ + LLVMBuildStore(b, l_int8_const(0), + LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, "")); + + /* + * Store datum. For byval: datums copy the value, extend to Datum's + * width, and store. For byref types: store pointer to data. + */ + if (att->attbyval) + { + LLVMValueRef v_tmp_loaddata; + LLVMTypeRef vartypep = + LLVMPointerType(LLVMIntType(att->attlen * 8), 0); + + v_tmp_loaddata = + LLVMBuildPointerCast(b, v_attdatap, vartypep, ""); + v_tmp_loaddata = LLVMBuildLoad(b, v_tmp_loaddata, "attr_byval"); + v_tmp_loaddata = LLVMBuildZExt(b, v_tmp_loaddata, TypeSizeT, ""); + + LLVMBuildStore(b, v_tmp_loaddata, v_resultp); + } + else + { + LLVMValueRef v_tmp_loaddata; + + /* store pointer */ + v_tmp_loaddata = + LLVMBuildPtrToInt(b, + v_attdatap, + TypeSizeT, + "attr_ptr"); + LLVMBuildStore(b, v_tmp_loaddata, v_resultp); + } + + /* increment data pointer */ + if (att->attlen > 0) + { + v_incby = l_sizet_const(att->attlen); + } + else if (att->attlen == -1) + { + v_incby = LLVMBuildCall(b, + llvm_pg_func(mod, "varsize_any"), + &v_attdatap, 1, + "varsize_any"); + l_callsite_ro(v_incby); + l_callsite_alwaysinline(v_incby); + } + else if (att->attlen == -2) + { + v_incby = LLVMBuildCall(b, + llvm_pg_func(mod, "strlen"), + &v_attdatap, 1, "strlen"); + + l_callsite_ro(v_incby); + + /* add 1 for NUL byte */ + v_incby = LLVMBuildAdd(b, v_incby, l_sizet_const(1), ""); + } + else + { + Assert(false); + v_incby = NULL; /* silence compiler */ + } + + if (attguaranteedalign) + { + Assert(known_alignment >= 0); + LLVMBuildStore(b, l_sizet_const(known_alignment), v_offp); + } + else + { + LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, ""); + + v_off = LLVMBuildAdd(b, v_off, v_incby, "increment_offset"); + LLVMBuildStore(b, v_off, v_offp); + } + + /* + * jump to next block, unless last possible column, or all desired + * (available) attributes have been fetched. + */ + if (attnum + 1 == natts) + { + /* jump out */ + LLVMBuildBr(b, b_out); + } + else + { + LLVMBuildBr(b, attcheckattnoblocks[attnum + 1]); + } + } + + + /* build block that returns */ + LLVMPositionBuilderAtEnd(b, b_out); + + { + LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, ""); + LLVMValueRef v_flags; + + LLVMBuildStore(b, l_int16_const(natts), v_nvalidp); + v_off = LLVMBuildTrunc(b, v_off, LLVMInt32Type(), ""); + LLVMBuildStore(b, v_off, v_slotoffp); + v_flags = LLVMBuildLoad(b, v_flagsp, "tts_flags"); + v_flags = LLVMBuildOr(b, v_flags, l_int16_const(TTS_FLAG_SLOW), ""); + LLVMBuildStore(b, v_flags, v_flagsp); + LLVMBuildRetVoid(b); + } + + LLVMDisposeBuilder(b); + + return v_deform_fn; +} diff --git a/src/backend/jit/llvm/llvmjit_error.cpp b/src/backend/jit/llvm/llvmjit_error.cpp new file mode 100644 index 0000000..5ad92f3 --- /dev/null +++ b/src/backend/jit/llvm/llvmjit_error.cpp @@ -0,0 +1,176 @@ +/*------------------------------------------------------------------------- + * + * llvmjit_error.cpp + * LLVM error related handling that requires interfacing with C++ + * + * Unfortunately neither (re)setting the C++ new handler, nor the LLVM OOM + * handler are exposed to C. Therefore this file wraps the necessary code. + * + * Copyright (c) 2016-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/jit/llvm/llvmjit_error.cpp + * + *------------------------------------------------------------------------- + */ + +extern "C" +{ +#include "postgres.h" +} + +#include <llvm/Support/ErrorHandling.h> + +#include "jit/llvmjit.h" + +#include <new> + +static int fatal_new_handler_depth = 0; +static std::new_handler old_new_handler = NULL; + +static void fatal_system_new_handler(void); +#if LLVM_VERSION_MAJOR > 4 +static void fatal_llvm_new_handler(void *user_data, const char *reason, bool gen_crash_diag); +#if LLVM_VERSION_MAJOR < 14 +static void fatal_llvm_new_handler(void *user_data, const std::string& reason, bool gen_crash_diag); +#endif +#endif +static void fatal_llvm_error_handler(void *user_data, const char *reason, bool gen_crash_diag); +#if LLVM_VERSION_MAJOR < 14 +static void fatal_llvm_error_handler(void *user_data, const std::string& reason, bool gen_crash_diag); +#endif + + +/* + * Enter a section in which C++ and LLVM errors are treated as FATAL errors. + * + * This is necessary for LLVM as LLVM's error handling for such cases + * (exit()ing, throwing std::bad_alloc() if compiled with exceptions, abort()) + * isn't compatible with postgres error handling. Thus in sections where LLVM + * code, not LLVM generated functions!, is executing, standard new, LLVM OOM + * and LLVM fatal errors (some OOM errors masquerade as those) are redirected + * to our own error handlers. + * + * These error handlers use FATAL, because there's no reliable way from within + * LLVM to throw an error that's guaranteed not to corrupt LLVM's state. + * + * To avoid disturbing extensions using C++ and/or LLVM, these handlers are + * unset when not executing LLVM code. There is no need to call + * llvm_leave_fatal_on_oom() when ERRORing out, error recovery resets the + * handlers in that case. + */ +void +llvm_enter_fatal_on_oom(void) +{ + if (fatal_new_handler_depth == 0) + { + old_new_handler = std::set_new_handler(fatal_system_new_handler); +#if LLVM_VERSION_MAJOR > 4 + llvm::install_bad_alloc_error_handler(fatal_llvm_new_handler); +#endif + llvm::install_fatal_error_handler(fatal_llvm_error_handler); + } + fatal_new_handler_depth++; +} + +/* + * Leave fatal error section started with llvm_enter_fatal_on_oom(). + */ +void +llvm_leave_fatal_on_oom(void) +{ + fatal_new_handler_depth--; + if (fatal_new_handler_depth == 0) + { + std::set_new_handler(old_new_handler); +#if LLVM_VERSION_MAJOR > 4 + llvm::remove_bad_alloc_error_handler(); +#endif + llvm::remove_fatal_error_handler(); + } +} + +/* + * Are we currently in a fatal-on-oom section? Useful to skip cleanup in case + * of errors. + */ +bool +llvm_in_fatal_on_oom(void) +{ + return fatal_new_handler_depth > 0; +} + +/* + * Reset fatal error handling. This should only be called in error recovery + * loops like PostgresMain()'s. + */ +void +llvm_reset_after_error(void) +{ + if (fatal_new_handler_depth != 0) + { + std::set_new_handler(old_new_handler); +#if LLVM_VERSION_MAJOR > 4 + llvm::remove_bad_alloc_error_handler(); +#endif + llvm::remove_fatal_error_handler(); + } + fatal_new_handler_depth = 0; +} + +void +llvm_assert_in_fatal_section(void) +{ + Assert(fatal_new_handler_depth > 0); +} + +static void +fatal_system_new_handler(void) +{ + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("while in LLVM"))); +} + +#if LLVM_VERSION_MAJOR > 4 +static void +fatal_llvm_new_handler(void *user_data, + const char *reason, + bool gen_crash_diag) +{ + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("While in LLVM: %s", reason))); +} +#if LLVM_VERSION_MAJOR < 14 +static void +fatal_llvm_new_handler(void *user_data, + const std::string& reason, + bool gen_crash_diag) +{ + fatal_llvm_new_handler(user_data, reason.c_str(), gen_crash_diag); +} +#endif +#endif + +static void +fatal_llvm_error_handler(void *user_data, + const char *reason, + bool gen_crash_diag) +{ + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("fatal llvm error: %s", reason))); +} + +#if LLVM_VERSION_MAJOR < 14 +static void +fatal_llvm_error_handler(void *user_data, + const std::string& reason, + bool gen_crash_diag) +{ + fatal_llvm_error_handler(user_data, reason.c_str(), gen_crash_diag); +} +#endif diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c new file mode 100644 index 0000000..8a4075b --- /dev/null +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -0,0 +1,2520 @@ +/*------------------------------------------------------------------------- + * + * llvmjit_expr.c + * JIT compile expressions. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/jit/llvm/llvmjit_expr.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <llvm-c/Core.h> +#include <llvm-c/Target.h> + +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_type.h" +#include "executor/execExpr.h" +#include "executor/execdebug.h" +#include "executor/nodeAgg.h" +#include "executor/nodeSubplan.h" +#include "funcapi.h" +#include "jit/llvmjit.h" +#include "jit/llvmjit_emit.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_coerce.h" +#include "parser/parsetree.h" +#include "pgstat.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/fmgrtab.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/timestamp.h" +#include "utils/typcache.h" +#include "utils/xml.h" + +typedef struct CompiledExprState +{ + LLVMJitContext *context; + const char *funcname; +} CompiledExprState; + + +static Datum ExecRunCompiledExpr(ExprState *state, ExprContext *econtext, bool *isNull); + +static LLVMValueRef BuildV1Call(LLVMJitContext *context, LLVMBuilderRef b, + LLVMModuleRef mod, FunctionCallInfo fcinfo, + LLVMValueRef *v_fcinfo_isnull); +static LLVMValueRef build_EvalXFuncInt(LLVMBuilderRef b, LLVMModuleRef mod, + const char *funcname, + LLVMValueRef v_state, + ExprEvalStep *op, + int natts, LLVMValueRef *v_args); +static LLVMValueRef create_LifetimeEnd(LLVMModuleRef mod); + +/* macro making it easier to call ExecEval* functions */ +#define build_EvalXFunc(b, mod, funcname, v_state, op, ...) \ + build_EvalXFuncInt(b, mod, funcname, v_state, op, \ + lengthof(((LLVMValueRef[]){__VA_ARGS__})), \ + ((LLVMValueRef[]){__VA_ARGS__})) + + +/* + * JIT compile expression. + */ +bool +llvm_compile_expr(ExprState *state) +{ + PlanState *parent = state->parent; + char *funcname; + + LLVMJitContext *context = NULL; + + LLVMBuilderRef b; + LLVMModuleRef mod; + LLVMValueRef eval_fn; + LLVMBasicBlockRef entry; + LLVMBasicBlockRef *opblocks; + + /* state itself */ + LLVMValueRef v_state; + LLVMValueRef v_econtext; + LLVMValueRef v_parent; + + /* returnvalue */ + LLVMValueRef v_isnullp; + + /* tmp vars in state */ + LLVMValueRef v_tmpvaluep; + LLVMValueRef v_tmpisnullp; + + /* slots */ + LLVMValueRef v_innerslot; + LLVMValueRef v_outerslot; + LLVMValueRef v_scanslot; + LLVMValueRef v_resultslot; + + /* nulls/values of slots */ + LLVMValueRef v_innervalues; + LLVMValueRef v_innernulls; + LLVMValueRef v_outervalues; + LLVMValueRef v_outernulls; + LLVMValueRef v_scanvalues; + LLVMValueRef v_scannulls; + LLVMValueRef v_resultvalues; + LLVMValueRef v_resultnulls; + + /* stuff in econtext */ + LLVMValueRef v_aggvalues; + LLVMValueRef v_aggnulls; + + instr_time starttime; + instr_time endtime; + + llvm_enter_fatal_on_oom(); + + /* + * Right now we don't support compiling expressions without a parent, as + * we need access to the EState. + */ + Assert(parent); + + /* get or create JIT context */ + if (parent->state->es_jit) + context = (LLVMJitContext *) parent->state->es_jit; + else + { + context = llvm_create_context(parent->state->es_jit_flags); + parent->state->es_jit = &context->base; + } + + INSTR_TIME_SET_CURRENT(starttime); + + mod = llvm_mutable_module(context); + + b = LLVMCreateBuilder(); + + funcname = llvm_expand_funcname(context, "evalexpr"); + + /* create function */ + eval_fn = LLVMAddFunction(mod, funcname, + llvm_pg_var_func_type("TypeExprStateEvalFunc")); + LLVMSetLinkage(eval_fn, LLVMExternalLinkage); + LLVMSetVisibility(eval_fn, LLVMDefaultVisibility); + llvm_copy_attributes(AttributeTemplate, eval_fn); + + entry = LLVMAppendBasicBlock(eval_fn, "entry"); + + /* build state */ + v_state = LLVMGetParam(eval_fn, 0); + v_econtext = LLVMGetParam(eval_fn, 1); + v_isnullp = LLVMGetParam(eval_fn, 2); + + LLVMPositionBuilderAtEnd(b, entry); + + v_tmpvaluep = LLVMBuildStructGEP(b, v_state, + FIELDNO_EXPRSTATE_RESVALUE, + "v.state.resvalue"); + v_tmpisnullp = LLVMBuildStructGEP(b, v_state, + FIELDNO_EXPRSTATE_RESNULL, + "v.state.resnull"); + v_parent = l_load_struct_gep(b, v_state, + FIELDNO_EXPRSTATE_PARENT, + "v.state.parent"); + + /* build global slots */ + v_scanslot = l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_SCANTUPLE, + "v_scanslot"); + v_innerslot = l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_INNERTUPLE, + "v_innerslot"); + v_outerslot = l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_OUTERTUPLE, + "v_outerslot"); + v_resultslot = l_load_struct_gep(b, v_state, + FIELDNO_EXPRSTATE_RESULTSLOT, + "v_resultslot"); + + /* build global values/isnull pointers */ + v_scanvalues = l_load_struct_gep(b, v_scanslot, + FIELDNO_TUPLETABLESLOT_VALUES, + "v_scanvalues"); + v_scannulls = l_load_struct_gep(b, v_scanslot, + FIELDNO_TUPLETABLESLOT_ISNULL, + "v_scannulls"); + v_innervalues = l_load_struct_gep(b, v_innerslot, + FIELDNO_TUPLETABLESLOT_VALUES, + "v_innervalues"); + v_innernulls = l_load_struct_gep(b, v_innerslot, + FIELDNO_TUPLETABLESLOT_ISNULL, + "v_innernulls"); + v_outervalues = l_load_struct_gep(b, v_outerslot, + FIELDNO_TUPLETABLESLOT_VALUES, + "v_outervalues"); + v_outernulls = l_load_struct_gep(b, v_outerslot, + FIELDNO_TUPLETABLESLOT_ISNULL, + "v_outernulls"); + v_resultvalues = l_load_struct_gep(b, v_resultslot, + FIELDNO_TUPLETABLESLOT_VALUES, + "v_resultvalues"); + v_resultnulls = l_load_struct_gep(b, v_resultslot, + FIELDNO_TUPLETABLESLOT_ISNULL, + "v_resultnulls"); + + /* aggvalues/aggnulls */ + v_aggvalues = l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_AGGVALUES, + "v.econtext.aggvalues"); + v_aggnulls = l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_AGGNULLS, + "v.econtext.aggnulls"); + + /* allocate blocks for each op upfront, so we can do jumps easily */ + opblocks = palloc(sizeof(LLVMBasicBlockRef) * state->steps_len); + for (int opno = 0; opno < state->steps_len; opno++) + opblocks[opno] = l_bb_append_v(eval_fn, "b.op.%d.start", opno); + + /* jump from entry to first block */ + LLVMBuildBr(b, opblocks[0]); + + for (int opno = 0; opno < state->steps_len; opno++) + { + ExprEvalStep *op; + ExprEvalOp opcode; + LLVMValueRef v_resvaluep; + LLVMValueRef v_resnullp; + + LLVMPositionBuilderAtEnd(b, opblocks[opno]); + + op = &state->steps[opno]; + opcode = ExecEvalStepOp(state, op); + + v_resvaluep = l_ptr_const(op->resvalue, l_ptr(TypeSizeT)); + v_resnullp = l_ptr_const(op->resnull, l_ptr(TypeStorageBool)); + + switch (opcode) + { + case EEOP_DONE: + { + LLVMValueRef v_tmpisnull; + LLVMValueRef v_tmpvalue; + + v_tmpvalue = LLVMBuildLoad(b, v_tmpvaluep, ""); + v_tmpisnull = LLVMBuildLoad(b, v_tmpisnullp, ""); + + LLVMBuildStore(b, v_tmpisnull, v_isnullp); + + LLVMBuildRet(b, v_tmpvalue); + break; + } + + case EEOP_INNER_FETCHSOME: + case EEOP_OUTER_FETCHSOME: + case EEOP_SCAN_FETCHSOME: + { + TupleDesc desc = NULL; + LLVMValueRef v_slot; + LLVMBasicBlockRef b_fetch; + LLVMValueRef v_nvalid; + LLVMValueRef l_jit_deform = NULL; + const TupleTableSlotOps *tts_ops = NULL; + + b_fetch = l_bb_before_v(opblocks[opno + 1], + "op.%d.fetch", opno); + + if (op->d.fetch.known_desc) + desc = op->d.fetch.known_desc; + + if (op->d.fetch.fixed) + tts_ops = op->d.fetch.kind; + + /* step should not have been generated */ + Assert(tts_ops != &TTSOpsVirtual); + + if (opcode == EEOP_INNER_FETCHSOME) + v_slot = v_innerslot; + else if (opcode == EEOP_OUTER_FETCHSOME) + v_slot = v_outerslot; + else + v_slot = v_scanslot; + + /* + * Check if all required attributes are available, or + * whether deforming is required. + */ + v_nvalid = + l_load_struct_gep(b, v_slot, + FIELDNO_TUPLETABLESLOT_NVALID, + ""); + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntUGE, v_nvalid, + l_int16_const(op->d.fetch.last_var), + ""), + opblocks[opno + 1], b_fetch); + + LLVMPositionBuilderAtEnd(b, b_fetch); + + /* + * If the tupledesc of the to-be-deformed tuple is known, + * and JITing of deforming is enabled, build deform + * function specific to tupledesc and the exact number of + * to-be-extracted attributes. + */ + if (tts_ops && desc && (context->base.flags & PGJIT_DEFORM)) + { + l_jit_deform = + slot_compile_deform(context, desc, + tts_ops, + op->d.fetch.last_var); + } + + if (l_jit_deform) + { + LLVMValueRef params[1]; + + params[0] = v_slot; + + LLVMBuildCall(b, l_jit_deform, + params, lengthof(params), ""); + } + else + { + LLVMValueRef params[2]; + + params[0] = v_slot; + params[1] = l_int32_const(op->d.fetch.last_var); + + LLVMBuildCall(b, + llvm_pg_func(mod, "slot_getsomeattrs_int"), + params, lengthof(params), ""); + } + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_INNER_VAR: + case EEOP_OUTER_VAR: + case EEOP_SCAN_VAR: + { + LLVMValueRef value, + isnull; + LLVMValueRef v_attnum; + LLVMValueRef v_values; + LLVMValueRef v_nulls; + + if (opcode == EEOP_INNER_VAR) + { + v_values = v_innervalues; + v_nulls = v_innernulls; + } + else if (opcode == EEOP_OUTER_VAR) + { + v_values = v_outervalues; + v_nulls = v_outernulls; + } + else + { + v_values = v_scanvalues; + v_nulls = v_scannulls; + } + + v_attnum = l_int32_const(op->d.var.attnum); + value = l_load_gep1(b, v_values, v_attnum, ""); + isnull = l_load_gep1(b, v_nulls, v_attnum, ""); + LLVMBuildStore(b, value, v_resvaluep); + LLVMBuildStore(b, isnull, v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_INNER_SYSVAR: + case EEOP_OUTER_SYSVAR: + case EEOP_SCAN_SYSVAR: + { + LLVMValueRef v_slot; + + if (opcode == EEOP_INNER_SYSVAR) + v_slot = v_innerslot; + else if (opcode == EEOP_OUTER_SYSVAR) + v_slot = v_outerslot; + else + v_slot = v_scanslot; + + build_EvalXFunc(b, mod, "ExecEvalSysVar", + v_state, op, v_econtext, v_slot); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_WHOLEROW: + build_EvalXFunc(b, mod, "ExecEvalWholeRowVar", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_ASSIGN_INNER_VAR: + case EEOP_ASSIGN_OUTER_VAR: + case EEOP_ASSIGN_SCAN_VAR: + { + LLVMValueRef v_value; + LLVMValueRef v_isnull; + LLVMValueRef v_rvaluep; + LLVMValueRef v_risnullp; + LLVMValueRef v_attnum; + LLVMValueRef v_resultnum; + LLVMValueRef v_values; + LLVMValueRef v_nulls; + + if (opcode == EEOP_ASSIGN_INNER_VAR) + { + v_values = v_innervalues; + v_nulls = v_innernulls; + } + else if (opcode == EEOP_ASSIGN_OUTER_VAR) + { + v_values = v_outervalues; + v_nulls = v_outernulls; + } + else + { + v_values = v_scanvalues; + v_nulls = v_scannulls; + } + + /* load data */ + v_attnum = l_int32_const(op->d.assign_var.attnum); + v_value = l_load_gep1(b, v_values, v_attnum, ""); + v_isnull = l_load_gep1(b, v_nulls, v_attnum, ""); + + /* compute addresses of targets */ + v_resultnum = l_int32_const(op->d.assign_var.resultnum); + v_rvaluep = LLVMBuildGEP(b, v_resultvalues, + &v_resultnum, 1, ""); + v_risnullp = LLVMBuildGEP(b, v_resultnulls, + &v_resultnum, 1, ""); + + /* and store */ + LLVMBuildStore(b, v_value, v_rvaluep); + LLVMBuildStore(b, v_isnull, v_risnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_ASSIGN_TMP: + case EEOP_ASSIGN_TMP_MAKE_RO: + { + LLVMValueRef v_value, + v_isnull; + LLVMValueRef v_rvaluep, + v_risnullp; + LLVMValueRef v_resultnum; + size_t resultnum = op->d.assign_tmp.resultnum; + + /* load data */ + v_value = LLVMBuildLoad(b, v_tmpvaluep, ""); + v_isnull = LLVMBuildLoad(b, v_tmpisnullp, ""); + + /* compute addresses of targets */ + v_resultnum = l_int32_const(resultnum); + v_rvaluep = + LLVMBuildGEP(b, v_resultvalues, &v_resultnum, 1, ""); + v_risnullp = + LLVMBuildGEP(b, v_resultnulls, &v_resultnum, 1, ""); + + /* store nullness */ + LLVMBuildStore(b, v_isnull, v_risnullp); + + /* make value readonly if necessary */ + if (opcode == EEOP_ASSIGN_TMP_MAKE_RO) + { + LLVMBasicBlockRef b_notnull; + LLVMValueRef v_params[1]; + + b_notnull = l_bb_before_v(opblocks[opno + 1], + "op.%d.assign_tmp.notnull", opno); + + /* check if value is NULL */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_isnull, + l_sbool_const(0), ""), + b_notnull, opblocks[opno + 1]); + + /* if value is not null, convert to RO datum */ + LLVMPositionBuilderAtEnd(b, b_notnull); + v_params[0] = v_value; + v_value = + LLVMBuildCall(b, + llvm_pg_func(mod, "MakeExpandedObjectReadOnlyInternal"), + v_params, lengthof(v_params), ""); + + /* + * Falling out of the if () with builder in b_notnull, + * which is fine - the null is already stored above. + */ + } + + /* and finally store result */ + LLVMBuildStore(b, v_value, v_rvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_CONST: + { + LLVMValueRef v_constvalue, + v_constnull; + + v_constvalue = l_sizet_const(op->d.constval.value); + v_constnull = l_sbool_const(op->d.constval.isnull); + + LLVMBuildStore(b, v_constvalue, v_resvaluep); + LLVMBuildStore(b, v_constnull, v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_FUNCEXPR: + case EEOP_FUNCEXPR_STRICT: + { + FunctionCallInfo fcinfo = op->d.func.fcinfo_data; + LLVMValueRef v_fcinfo_isnull; + LLVMValueRef v_retval; + + if (opcode == EEOP_FUNCEXPR_STRICT) + { + LLVMBasicBlockRef b_nonull; + LLVMBasicBlockRef *b_checkargnulls; + LLVMValueRef v_fcinfo; + + /* + * Block for the actual function call, if args are + * non-NULL. + */ + b_nonull = l_bb_before_v(opblocks[opno + 1], + "b.%d.no-null-args", opno); + + /* should make sure they're optimized beforehand */ + if (op->d.func.nargs == 0) + elog(ERROR, "argumentless strict functions are pointless"); + + v_fcinfo = + l_ptr_const(fcinfo, l_ptr(StructFunctionCallInfoData)); + + /* + * set resnull to true, if the function is actually + * called, it'll be reset + */ + LLVMBuildStore(b, l_sbool_const(1), v_resnullp); + + /* create blocks for checking args, one for each */ + b_checkargnulls = + palloc(sizeof(LLVMBasicBlockRef *) * op->d.func.nargs); + for (int argno = 0; argno < op->d.func.nargs; argno++) + b_checkargnulls[argno] = + l_bb_before_v(b_nonull, "b.%d.isnull.%d", opno, + argno); + + /* jump to check of first argument */ + LLVMBuildBr(b, b_checkargnulls[0]); + + /* check each arg for NULLness */ + for (int argno = 0; argno < op->d.func.nargs; argno++) + { + LLVMValueRef v_argisnull; + LLVMBasicBlockRef b_argnotnull; + + LLVMPositionBuilderAtEnd(b, b_checkargnulls[argno]); + + /* + * Compute block to jump to if argument is not + * null. + */ + if (argno + 1 == op->d.func.nargs) + b_argnotnull = b_nonull; + else + b_argnotnull = b_checkargnulls[argno + 1]; + + /* and finally load & check NULLness of arg */ + v_argisnull = l_funcnull(b, v_fcinfo, argno); + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, + v_argisnull, + l_sbool_const(1), + ""), + opblocks[opno + 1], + b_argnotnull); + } + + LLVMPositionBuilderAtEnd(b, b_nonull); + } + + v_retval = BuildV1Call(context, b, mod, fcinfo, + &v_fcinfo_isnull); + LLVMBuildStore(b, v_retval, v_resvaluep); + LLVMBuildStore(b, v_fcinfo_isnull, v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_FUNCEXPR_FUSAGE: + build_EvalXFunc(b, mod, "ExecEvalFuncExprFusage", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + + case EEOP_FUNCEXPR_STRICT_FUSAGE: + build_EvalXFunc(b, mod, "ExecEvalFuncExprStrictFusage", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + /* + * Treat them the same for now, optimizer can remove + * redundancy. Could be worthwhile to optimize during emission + * though. + */ + case EEOP_BOOL_AND_STEP_FIRST: + case EEOP_BOOL_AND_STEP: + case EEOP_BOOL_AND_STEP_LAST: + { + LLVMValueRef v_boolvalue; + LLVMValueRef v_boolnull; + LLVMValueRef v_boolanynullp, + v_boolanynull; + LLVMBasicBlockRef b_boolisnull; + LLVMBasicBlockRef b_boolcheckfalse; + LLVMBasicBlockRef b_boolisfalse; + LLVMBasicBlockRef b_boolcont; + LLVMBasicBlockRef b_boolisanynull; + + b_boolisnull = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolisnull", opno); + b_boolcheckfalse = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolcheckfalse", opno); + b_boolisfalse = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolisfalse", opno); + b_boolisanynull = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolisanynull", opno); + b_boolcont = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolcont", opno); + + v_boolanynullp = l_ptr_const(op->d.boolexpr.anynull, + l_ptr(TypeStorageBool)); + + if (opcode == EEOP_BOOL_AND_STEP_FIRST) + LLVMBuildStore(b, l_sbool_const(0), v_boolanynullp); + + v_boolnull = LLVMBuildLoad(b, v_resnullp, ""); + v_boolvalue = LLVMBuildLoad(b, v_resvaluep, ""); + + /* set resnull to boolnull */ + LLVMBuildStore(b, v_boolnull, v_resnullp); + /* set revalue to boolvalue */ + LLVMBuildStore(b, v_boolvalue, v_resvaluep); + + /* check if current input is NULL */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_boolnull, + l_sbool_const(1), ""), + b_boolisnull, + b_boolcheckfalse); + + /* build block that sets anynull */ + LLVMPositionBuilderAtEnd(b, b_boolisnull); + /* set boolanynull to true */ + LLVMBuildStore(b, l_sbool_const(1), v_boolanynullp); + /* and jump to next block */ + LLVMBuildBr(b, b_boolcont); + + /* build block checking for false */ + LLVMPositionBuilderAtEnd(b, b_boolcheckfalse); + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_boolvalue, + l_sizet_const(0), ""), + b_boolisfalse, + b_boolcont); + + /* + * Build block handling FALSE. Value is false, so short + * circuit. + */ + LLVMPositionBuilderAtEnd(b, b_boolisfalse); + /* result is already set to FALSE, need not change it */ + /* and jump to the end of the AND expression */ + LLVMBuildBr(b, opblocks[op->d.boolexpr.jumpdone]); + + /* Build block that continues if bool is TRUE. */ + LLVMPositionBuilderAtEnd(b, b_boolcont); + + v_boolanynull = LLVMBuildLoad(b, v_boolanynullp, ""); + + /* set value to NULL if any previous values were NULL */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_boolanynull, + l_sbool_const(0), ""), + opblocks[opno + 1], b_boolisanynull); + + LLVMPositionBuilderAtEnd(b, b_boolisanynull); + /* set resnull to true */ + LLVMBuildStore(b, l_sbool_const(1), v_resnullp); + /* reset resvalue */ + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + /* + * Treat them the same for now, optimizer can remove + * redundancy. Could be worthwhile to optimize during emission + * though. + */ + case EEOP_BOOL_OR_STEP_FIRST: + case EEOP_BOOL_OR_STEP: + case EEOP_BOOL_OR_STEP_LAST: + { + LLVMValueRef v_boolvalue; + LLVMValueRef v_boolnull; + LLVMValueRef v_boolanynullp, + v_boolanynull; + + LLVMBasicBlockRef b_boolisnull; + LLVMBasicBlockRef b_boolchecktrue; + LLVMBasicBlockRef b_boolistrue; + LLVMBasicBlockRef b_boolcont; + LLVMBasicBlockRef b_boolisanynull; + + b_boolisnull = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolisnull", opno); + b_boolchecktrue = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolchecktrue", opno); + b_boolistrue = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolistrue", opno); + b_boolisanynull = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolisanynull", opno); + b_boolcont = l_bb_before_v(opblocks[opno + 1], + "b.%d.boolcont", opno); + + v_boolanynullp = l_ptr_const(op->d.boolexpr.anynull, + l_ptr(TypeStorageBool)); + + if (opcode == EEOP_BOOL_OR_STEP_FIRST) + LLVMBuildStore(b, l_sbool_const(0), v_boolanynullp); + v_boolnull = LLVMBuildLoad(b, v_resnullp, ""); + v_boolvalue = LLVMBuildLoad(b, v_resvaluep, ""); + + /* set resnull to boolnull */ + LLVMBuildStore(b, v_boolnull, v_resnullp); + /* set revalue to boolvalue */ + LLVMBuildStore(b, v_boolvalue, v_resvaluep); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_boolnull, + l_sbool_const(1), ""), + b_boolisnull, + b_boolchecktrue); + + /* build block that sets anynull */ + LLVMPositionBuilderAtEnd(b, b_boolisnull); + /* set boolanynull to true */ + LLVMBuildStore(b, l_sbool_const(1), v_boolanynullp); + /* and jump to next block */ + LLVMBuildBr(b, b_boolcont); + + /* build block checking for true */ + LLVMPositionBuilderAtEnd(b, b_boolchecktrue); + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_boolvalue, + l_sizet_const(1), ""), + b_boolistrue, + b_boolcont); + + /* + * Build block handling True. Value is true, so short + * circuit. + */ + LLVMPositionBuilderAtEnd(b, b_boolistrue); + /* result is already set to TRUE, need not change it */ + /* and jump to the end of the OR expression */ + LLVMBuildBr(b, opblocks[op->d.boolexpr.jumpdone]); + + /* build block that continues if bool is FALSE */ + LLVMPositionBuilderAtEnd(b, b_boolcont); + + v_boolanynull = LLVMBuildLoad(b, v_boolanynullp, ""); + + /* set value to NULL if any previous values were NULL */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_boolanynull, + l_sbool_const(0), ""), + opblocks[opno + 1], b_boolisanynull); + + LLVMPositionBuilderAtEnd(b, b_boolisanynull); + /* set resnull to true */ + LLVMBuildStore(b, l_sbool_const(1), v_resnullp); + /* reset resvalue */ + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_BOOL_NOT_STEP: + { + LLVMValueRef v_boolvalue; + LLVMValueRef v_boolnull; + LLVMValueRef v_negbool; + + v_boolnull = LLVMBuildLoad(b, v_resnullp, ""); + v_boolvalue = LLVMBuildLoad(b, v_resvaluep, ""); + + v_negbool = LLVMBuildZExt(b, + LLVMBuildICmp(b, LLVMIntEQ, + v_boolvalue, + l_sizet_const(0), + ""), + TypeSizeT, ""); + /* set resnull to boolnull */ + LLVMBuildStore(b, v_boolnull, v_resnullp); + /* set revalue to !boolvalue */ + LLVMBuildStore(b, v_negbool, v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_QUAL: + { + LLVMValueRef v_resnull; + LLVMValueRef v_resvalue; + LLVMValueRef v_nullorfalse; + LLVMBasicBlockRef b_qualfail; + + b_qualfail = l_bb_before_v(opblocks[opno + 1], + "op.%d.qualfail", opno); + + v_resvalue = LLVMBuildLoad(b, v_resvaluep, ""); + v_resnull = LLVMBuildLoad(b, v_resnullp, ""); + + v_nullorfalse = + LLVMBuildOr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_resnull, + l_sbool_const(1), ""), + LLVMBuildICmp(b, LLVMIntEQ, v_resvalue, + l_sizet_const(0), ""), + ""); + + LLVMBuildCondBr(b, + v_nullorfalse, + b_qualfail, + opblocks[opno + 1]); + + /* build block handling NULL or false */ + LLVMPositionBuilderAtEnd(b, b_qualfail); + /* set resnull to false */ + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + /* set resvalue to false */ + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + /* and jump out */ + LLVMBuildBr(b, opblocks[op->d.qualexpr.jumpdone]); + break; + } + + case EEOP_JUMP: + { + LLVMBuildBr(b, opblocks[op->d.jump.jumpdone]); + break; + } + + case EEOP_JUMP_IF_NULL: + { + LLVMValueRef v_resnull; + + /* Transfer control if current result is null */ + + v_resnull = LLVMBuildLoad(b, v_resnullp, ""); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_resnull, + l_sbool_const(1), ""), + opblocks[op->d.jump.jumpdone], + opblocks[opno + 1]); + break; + } + + case EEOP_JUMP_IF_NOT_NULL: + { + LLVMValueRef v_resnull; + + /* Transfer control if current result is non-null */ + + v_resnull = LLVMBuildLoad(b, v_resnullp, ""); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_resnull, + l_sbool_const(0), ""), + opblocks[op->d.jump.jumpdone], + opblocks[opno + 1]); + break; + } + + + case EEOP_JUMP_IF_NOT_TRUE: + { + LLVMValueRef v_resnull; + LLVMValueRef v_resvalue; + LLVMValueRef v_nullorfalse; + + /* Transfer control if current result is null or false */ + + v_resvalue = LLVMBuildLoad(b, v_resvaluep, ""); + v_resnull = LLVMBuildLoad(b, v_resnullp, ""); + + v_nullorfalse = + LLVMBuildOr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_resnull, + l_sbool_const(1), ""), + LLVMBuildICmp(b, LLVMIntEQ, v_resvalue, + l_sizet_const(0), ""), + ""); + + LLVMBuildCondBr(b, + v_nullorfalse, + opblocks[op->d.jump.jumpdone], + opblocks[opno + 1]); + break; + } + + case EEOP_NULLTEST_ISNULL: + { + LLVMValueRef v_resnull = LLVMBuildLoad(b, v_resnullp, ""); + LLVMValueRef v_resvalue; + + v_resvalue = + LLVMBuildSelect(b, + LLVMBuildICmp(b, LLVMIntEQ, v_resnull, + l_sbool_const(1), ""), + l_sizet_const(1), + l_sizet_const(0), + ""); + LLVMBuildStore(b, v_resvalue, v_resvaluep); + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_NULLTEST_ISNOTNULL: + { + LLVMValueRef v_resnull = LLVMBuildLoad(b, v_resnullp, ""); + LLVMValueRef v_resvalue; + + v_resvalue = + LLVMBuildSelect(b, + LLVMBuildICmp(b, LLVMIntEQ, v_resnull, + l_sbool_const(1), ""), + l_sizet_const(0), + l_sizet_const(1), + ""); + LLVMBuildStore(b, v_resvalue, v_resvaluep); + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_NULLTEST_ROWISNULL: + build_EvalXFunc(b, mod, "ExecEvalRowNull", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_NULLTEST_ROWISNOTNULL: + build_EvalXFunc(b, mod, "ExecEvalRowNotNull", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_BOOLTEST_IS_TRUE: + case EEOP_BOOLTEST_IS_NOT_FALSE: + case EEOP_BOOLTEST_IS_FALSE: + case EEOP_BOOLTEST_IS_NOT_TRUE: + { + LLVMBasicBlockRef b_isnull, + b_notnull; + LLVMValueRef v_resnull = LLVMBuildLoad(b, v_resnullp, ""); + + b_isnull = l_bb_before_v(opblocks[opno + 1], + "op.%d.isnull", opno); + b_notnull = l_bb_before_v(opblocks[opno + 1], + "op.%d.isnotnull", opno); + + /* check if value is NULL */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_resnull, + l_sbool_const(1), ""), + b_isnull, b_notnull); + + /* if value is NULL, return false */ + LLVMPositionBuilderAtEnd(b, b_isnull); + + /* result is not null */ + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + + if (opcode == EEOP_BOOLTEST_IS_TRUE || + opcode == EEOP_BOOLTEST_IS_FALSE) + { + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + } + else + { + LLVMBuildStore(b, l_sizet_const(1), v_resvaluep); + } + + LLVMBuildBr(b, opblocks[opno + 1]); + + LLVMPositionBuilderAtEnd(b, b_notnull); + + if (opcode == EEOP_BOOLTEST_IS_TRUE || + opcode == EEOP_BOOLTEST_IS_NOT_FALSE) + { + /* + * if value is not null NULL, return value (already + * set) + */ + } + else + { + LLVMValueRef v_value = + LLVMBuildLoad(b, v_resvaluep, ""); + + v_value = LLVMBuildZExt(b, + LLVMBuildICmp(b, LLVMIntEQ, + v_value, + l_sizet_const(0), + ""), + TypeSizeT, ""); + LLVMBuildStore(b, v_value, v_resvaluep); + } + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_PARAM_EXEC: + build_EvalXFunc(b, mod, "ExecEvalParamExec", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_PARAM_EXTERN: + build_EvalXFunc(b, mod, "ExecEvalParamExtern", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_PARAM_CALLBACK: + { + LLVMTypeRef v_functype; + LLVMValueRef v_func; + LLVMValueRef v_params[3]; + + v_functype = llvm_pg_var_func_type("TypeExecEvalSubroutine"); + v_func = l_ptr_const(op->d.cparam.paramfunc, + LLVMPointerType(v_functype, 0)); + + v_params[0] = v_state; + v_params[1] = l_ptr_const(op, l_ptr(StructExprEvalStep)); + v_params[2] = v_econtext; + LLVMBuildCall(b, + v_func, + v_params, lengthof(v_params), ""); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_SBSREF_SUBSCRIPTS: + { + int jumpdone = op->d.sbsref_subscript.jumpdone; + LLVMTypeRef v_functype; + LLVMValueRef v_func; + LLVMValueRef v_params[3]; + LLVMValueRef v_ret; + + v_functype = llvm_pg_var_func_type("TypeExecEvalBoolSubroutine"); + v_func = l_ptr_const(op->d.sbsref_subscript.subscriptfunc, + LLVMPointerType(v_functype, 0)); + + v_params[0] = v_state; + v_params[1] = l_ptr_const(op, l_ptr(StructExprEvalStep)); + v_params[2] = v_econtext; + v_ret = LLVMBuildCall(b, + v_func, + v_params, lengthof(v_params), ""); + v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, ""); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_ret, + l_sbool_const(1), ""), + opblocks[opno + 1], + opblocks[jumpdone]); + break; + } + + case EEOP_SBSREF_OLD: + case EEOP_SBSREF_ASSIGN: + case EEOP_SBSREF_FETCH: + { + LLVMTypeRef v_functype; + LLVMValueRef v_func; + LLVMValueRef v_params[3]; + + v_functype = llvm_pg_var_func_type("TypeExecEvalSubroutine"); + v_func = l_ptr_const(op->d.sbsref.subscriptfunc, + LLVMPointerType(v_functype, 0)); + + v_params[0] = v_state; + v_params[1] = l_ptr_const(op, l_ptr(StructExprEvalStep)); + v_params[2] = v_econtext; + LLVMBuildCall(b, + v_func, + v_params, lengthof(v_params), ""); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_CASE_TESTVAL: + { + LLVMBasicBlockRef b_avail, + b_notavail; + LLVMValueRef v_casevaluep, + v_casevalue; + LLVMValueRef v_casenullp, + v_casenull; + LLVMValueRef v_casevaluenull; + + b_avail = l_bb_before_v(opblocks[opno + 1], + "op.%d.avail", opno); + b_notavail = l_bb_before_v(opblocks[opno + 1], + "op.%d.notavail", opno); + + v_casevaluep = l_ptr_const(op->d.casetest.value, + l_ptr(TypeSizeT)); + v_casenullp = l_ptr_const(op->d.casetest.isnull, + l_ptr(TypeStorageBool)); + + v_casevaluenull = + LLVMBuildICmp(b, LLVMIntEQ, + LLVMBuildPtrToInt(b, v_casevaluep, + TypeSizeT, ""), + l_sizet_const(0), ""); + LLVMBuildCondBr(b, v_casevaluenull, b_notavail, b_avail); + + /* if casetest != NULL */ + LLVMPositionBuilderAtEnd(b, b_avail); + v_casevalue = LLVMBuildLoad(b, v_casevaluep, ""); + v_casenull = LLVMBuildLoad(b, v_casenullp, ""); + LLVMBuildStore(b, v_casevalue, v_resvaluep); + LLVMBuildStore(b, v_casenull, v_resnullp); + LLVMBuildBr(b, opblocks[opno + 1]); + + /* if casetest == NULL */ + LLVMPositionBuilderAtEnd(b, b_notavail); + v_casevalue = + l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_CASEDATUM, ""); + v_casenull = + l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_CASENULL, ""); + LLVMBuildStore(b, v_casevalue, v_resvaluep); + LLVMBuildStore(b, v_casenull, v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_MAKE_READONLY: + { + LLVMBasicBlockRef b_notnull; + LLVMValueRef v_params[1]; + LLVMValueRef v_ret; + LLVMValueRef v_nullp; + LLVMValueRef v_valuep; + LLVMValueRef v_null; + LLVMValueRef v_value; + + b_notnull = l_bb_before_v(opblocks[opno + 1], + "op.%d.readonly.notnull", opno); + + v_nullp = l_ptr_const(op->d.make_readonly.isnull, + l_ptr(TypeStorageBool)); + + v_null = LLVMBuildLoad(b, v_nullp, ""); + + /* store null isnull value in result */ + LLVMBuildStore(b, v_null, v_resnullp); + + /* check if value is NULL */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_null, + l_sbool_const(1), ""), + opblocks[opno + 1], b_notnull); + + /* if value is not null, convert to RO datum */ + LLVMPositionBuilderAtEnd(b, b_notnull); + + v_valuep = l_ptr_const(op->d.make_readonly.value, + l_ptr(TypeSizeT)); + + v_value = LLVMBuildLoad(b, v_valuep, ""); + + v_params[0] = v_value; + v_ret = + LLVMBuildCall(b, + llvm_pg_func(mod, "MakeExpandedObjectReadOnlyInternal"), + v_params, lengthof(v_params), ""); + LLVMBuildStore(b, v_ret, v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_IOCOERCE: + { + FunctionCallInfo fcinfo_out, + fcinfo_in; + LLVMValueRef v_fn_out, + v_fn_in; + LLVMValueRef v_fcinfo_out, + v_fcinfo_in; + LLVMValueRef v_fcinfo_in_isnullp; + LLVMValueRef v_retval; + LLVMValueRef v_resvalue; + LLVMValueRef v_resnull; + + LLVMValueRef v_output_skip; + LLVMValueRef v_output; + + LLVMBasicBlockRef b_skipoutput; + LLVMBasicBlockRef b_calloutput; + LLVMBasicBlockRef b_input; + LLVMBasicBlockRef b_inputcall; + + fcinfo_out = op->d.iocoerce.fcinfo_data_out; + fcinfo_in = op->d.iocoerce.fcinfo_data_in; + + b_skipoutput = l_bb_before_v(opblocks[opno + 1], + "op.%d.skipoutputnull", opno); + b_calloutput = l_bb_before_v(opblocks[opno + 1], + "op.%d.calloutput", opno); + b_input = l_bb_before_v(opblocks[opno + 1], + "op.%d.input", opno); + b_inputcall = l_bb_before_v(opblocks[opno + 1], + "op.%d.inputcall", opno); + + v_fn_out = llvm_function_reference(context, b, mod, fcinfo_out); + v_fn_in = llvm_function_reference(context, b, mod, fcinfo_in); + v_fcinfo_out = l_ptr_const(fcinfo_out, l_ptr(StructFunctionCallInfoData)); + v_fcinfo_in = l_ptr_const(fcinfo_in, l_ptr(StructFunctionCallInfoData)); + + v_fcinfo_in_isnullp = + LLVMBuildStructGEP(b, v_fcinfo_in, + FIELDNO_FUNCTIONCALLINFODATA_ISNULL, + "v_fcinfo_in_isnull"); + + /* output functions are not called on nulls */ + v_resnull = LLVMBuildLoad(b, v_resnullp, ""); + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_resnull, + l_sbool_const(1), ""), + b_skipoutput, + b_calloutput); + + LLVMPositionBuilderAtEnd(b, b_skipoutput); + v_output_skip = l_sizet_const(0); + LLVMBuildBr(b, b_input); + + LLVMPositionBuilderAtEnd(b, b_calloutput); + v_resvalue = LLVMBuildLoad(b, v_resvaluep, ""); + + /* set arg[0] */ + LLVMBuildStore(b, + v_resvalue, + l_funcvaluep(b, v_fcinfo_out, 0)); + LLVMBuildStore(b, + l_sbool_const(0), + l_funcnullp(b, v_fcinfo_out, 0)); + /* and call output function (can never return NULL) */ + v_output = LLVMBuildCall(b, v_fn_out, &v_fcinfo_out, + 1, "funccall_coerce_out"); + LLVMBuildBr(b, b_input); + + /* build block handling input function call */ + LLVMPositionBuilderAtEnd(b, b_input); + + /* phi between resnull and output function call branches */ + { + LLVMValueRef incoming_values[2]; + LLVMBasicBlockRef incoming_blocks[2]; + + incoming_values[0] = v_output_skip; + incoming_blocks[0] = b_skipoutput; + + incoming_values[1] = v_output; + incoming_blocks[1] = b_calloutput; + + v_output = LLVMBuildPhi(b, TypeSizeT, "output"); + LLVMAddIncoming(v_output, + incoming_values, incoming_blocks, + lengthof(incoming_blocks)); + } + + /* + * If input function is strict, skip if input string is + * NULL. + */ + if (op->d.iocoerce.finfo_in->fn_strict) + { + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_output, + l_sizet_const(0), ""), + opblocks[opno + 1], + b_inputcall); + } + else + { + LLVMBuildBr(b, b_inputcall); + } + + LLVMPositionBuilderAtEnd(b, b_inputcall); + /* set arguments */ + /* arg0: output */ + LLVMBuildStore(b, v_output, + l_funcvaluep(b, v_fcinfo_in, 0)); + LLVMBuildStore(b, v_resnull, + l_funcnullp(b, v_fcinfo_in, 0)); + + /* arg1: ioparam: preset in execExpr.c */ + /* arg2: typmod: preset in execExpr.c */ + + /* reset fcinfo_in->isnull */ + LLVMBuildStore(b, l_sbool_const(0), v_fcinfo_in_isnullp); + /* and call function */ + v_retval = LLVMBuildCall(b, v_fn_in, &v_fcinfo_in, 1, + "funccall_iocoerce_in"); + + LLVMBuildStore(b, v_retval, v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_DISTINCT: + case EEOP_NOT_DISTINCT: + { + FunctionCallInfo fcinfo = op->d.func.fcinfo_data; + + LLVMValueRef v_fcinfo; + LLVMValueRef v_fcinfo_isnull; + + LLVMValueRef v_argnull0, + v_argisnull0; + LLVMValueRef v_argnull1, + v_argisnull1; + + LLVMValueRef v_anyargisnull; + LLVMValueRef v_bothargisnull; + + LLVMValueRef v_result; + + LLVMBasicBlockRef b_noargnull; + LLVMBasicBlockRef b_checkbothargnull; + LLVMBasicBlockRef b_bothargnull; + LLVMBasicBlockRef b_anyargnull; + + b_noargnull = l_bb_before_v(opblocks[opno + 1], "op.%d.noargnull", opno); + b_checkbothargnull = l_bb_before_v(opblocks[opno + 1], "op.%d.checkbothargnull", opno); + b_bothargnull = l_bb_before_v(opblocks[opno + 1], "op.%d.bothargnull", opno); + b_anyargnull = l_bb_before_v(opblocks[opno + 1], "op.%d.anyargnull", opno); + + v_fcinfo = l_ptr_const(fcinfo, l_ptr(StructFunctionCallInfoData)); + + /* load args[0|1].isnull for both arguments */ + v_argnull0 = l_funcnull(b, v_fcinfo, 0); + v_argisnull0 = LLVMBuildICmp(b, LLVMIntEQ, v_argnull0, + l_sbool_const(1), ""); + v_argnull1 = l_funcnull(b, v_fcinfo, 1); + v_argisnull1 = LLVMBuildICmp(b, LLVMIntEQ, v_argnull1, + l_sbool_const(1), ""); + + v_anyargisnull = LLVMBuildOr(b, v_argisnull0, v_argisnull1, ""); + v_bothargisnull = LLVMBuildAnd(b, v_argisnull0, v_argisnull1, ""); + + /* + * Check function arguments for NULLness: If either is + * NULL, we check if both args are NULL. Otherwise call + * comparator. + */ + LLVMBuildCondBr(b, v_anyargisnull, b_checkbothargnull, + b_noargnull); + + /* + * build block checking if any arg is null + */ + LLVMPositionBuilderAtEnd(b, b_checkbothargnull); + LLVMBuildCondBr(b, v_bothargisnull, b_bothargnull, + b_anyargnull); + + + /* Both NULL? Then is not distinct... */ + LLVMPositionBuilderAtEnd(b, b_bothargnull); + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + if (opcode == EEOP_NOT_DISTINCT) + LLVMBuildStore(b, l_sizet_const(1), v_resvaluep); + else + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + + /* Only one is NULL? Then is distinct... */ + LLVMPositionBuilderAtEnd(b, b_anyargnull); + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + if (opcode == EEOP_NOT_DISTINCT) + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + else + LLVMBuildStore(b, l_sizet_const(1), v_resvaluep); + LLVMBuildBr(b, opblocks[opno + 1]); + + /* neither argument is null: compare */ + LLVMPositionBuilderAtEnd(b, b_noargnull); + + v_result = BuildV1Call(context, b, mod, fcinfo, + &v_fcinfo_isnull); + + if (opcode == EEOP_DISTINCT) + { + /* Must invert result of "=" */ + v_result = + LLVMBuildZExt(b, + LLVMBuildICmp(b, LLVMIntEQ, + v_result, + l_sizet_const(0), ""), + TypeSizeT, ""); + } + + LLVMBuildStore(b, v_fcinfo_isnull, v_resnullp); + LLVMBuildStore(b, v_result, v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_NULLIF: + { + FunctionCallInfo fcinfo = op->d.func.fcinfo_data; + + LLVMValueRef v_fcinfo; + LLVMValueRef v_fcinfo_isnull; + LLVMValueRef v_argnull0; + LLVMValueRef v_argnull1; + LLVMValueRef v_anyargisnull; + LLVMValueRef v_arg0; + LLVMBasicBlockRef b_hasnull; + LLVMBasicBlockRef b_nonull; + LLVMBasicBlockRef b_argsequal; + LLVMValueRef v_retval; + LLVMValueRef v_argsequal; + + b_hasnull = l_bb_before_v(opblocks[opno + 1], + "b.%d.null-args", opno); + b_nonull = l_bb_before_v(opblocks[opno + 1], + "b.%d.no-null-args", opno); + b_argsequal = l_bb_before_v(opblocks[opno + 1], + "b.%d.argsequal", opno); + + v_fcinfo = l_ptr_const(fcinfo, l_ptr(StructFunctionCallInfoData)); + + /* if either argument is NULL they can't be equal */ + v_argnull0 = l_funcnull(b, v_fcinfo, 0); + v_argnull1 = l_funcnull(b, v_fcinfo, 1); + + v_anyargisnull = + LLVMBuildOr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_argnull0, + l_sbool_const(1), ""), + LLVMBuildICmp(b, LLVMIntEQ, v_argnull1, + l_sbool_const(1), ""), + ""); + + LLVMBuildCondBr(b, v_anyargisnull, b_hasnull, b_nonull); + + /* one (or both) of the arguments are null, return arg[0] */ + LLVMPositionBuilderAtEnd(b, b_hasnull); + v_arg0 = l_funcvalue(b, v_fcinfo, 0); + LLVMBuildStore(b, v_argnull0, v_resnullp); + LLVMBuildStore(b, v_arg0, v_resvaluep); + LLVMBuildBr(b, opblocks[opno + 1]); + + /* build block to invoke function and check result */ + LLVMPositionBuilderAtEnd(b, b_nonull); + + v_retval = BuildV1Call(context, b, mod, fcinfo, &v_fcinfo_isnull); + + /* + * If result not null, and arguments are equal return null + * (same result as if there'd been NULLs, hence reuse + * b_hasnull). + */ + v_argsequal = LLVMBuildAnd(b, + LLVMBuildICmp(b, LLVMIntEQ, + v_fcinfo_isnull, + l_sbool_const(0), + ""), + LLVMBuildICmp(b, LLVMIntEQ, + v_retval, + l_sizet_const(1), + ""), + ""); + LLVMBuildCondBr(b, v_argsequal, b_argsequal, b_hasnull); + + /* build block setting result to NULL, if args are equal */ + LLVMPositionBuilderAtEnd(b, b_argsequal); + LLVMBuildStore(b, l_sbool_const(1), v_resnullp); + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + LLVMBuildStore(b, v_retval, v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_SQLVALUEFUNCTION: + build_EvalXFunc(b, mod, "ExecEvalSQLValueFunction", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_CURRENTOFEXPR: + build_EvalXFunc(b, mod, "ExecEvalCurrentOfExpr", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_NEXTVALUEEXPR: + build_EvalXFunc(b, mod, "ExecEvalNextValueExpr", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_ARRAYEXPR: + build_EvalXFunc(b, mod, "ExecEvalArrayExpr", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_ARRAYCOERCE: + build_EvalXFunc(b, mod, "ExecEvalArrayCoerce", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_ROW: + build_EvalXFunc(b, mod, "ExecEvalRow", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_ROWCOMPARE_STEP: + { + FunctionCallInfo fcinfo = op->d.rowcompare_step.fcinfo_data; + LLVMValueRef v_fcinfo_isnull; + LLVMBasicBlockRef b_null; + LLVMBasicBlockRef b_compare; + LLVMBasicBlockRef b_compare_result; + + LLVMValueRef v_retval; + + b_null = l_bb_before_v(opblocks[opno + 1], + "op.%d.row-null", opno); + b_compare = l_bb_before_v(opblocks[opno + 1], + "op.%d.row-compare", opno); + b_compare_result = + l_bb_before_v(opblocks[opno + 1], + "op.%d.row-compare-result", + opno); + + /* + * If function is strict, and either arg is null, we're + * done. + */ + if (op->d.rowcompare_step.finfo->fn_strict) + { + LLVMValueRef v_fcinfo; + LLVMValueRef v_argnull0; + LLVMValueRef v_argnull1; + LLVMValueRef v_anyargisnull; + + v_fcinfo = l_ptr_const(fcinfo, + l_ptr(StructFunctionCallInfoData)); + + v_argnull0 = l_funcnull(b, v_fcinfo, 0); + v_argnull1 = l_funcnull(b, v_fcinfo, 1); + + v_anyargisnull = + LLVMBuildOr(b, + LLVMBuildICmp(b, + LLVMIntEQ, + v_argnull0, + l_sbool_const(1), + ""), + LLVMBuildICmp(b, LLVMIntEQ, + v_argnull1, + l_sbool_const(1), ""), + ""); + + LLVMBuildCondBr(b, v_anyargisnull, b_null, b_compare); + } + else + { + LLVMBuildBr(b, b_compare); + } + + /* build block invoking comparison function */ + LLVMPositionBuilderAtEnd(b, b_compare); + + /* call function */ + v_retval = BuildV1Call(context, b, mod, fcinfo, + &v_fcinfo_isnull); + LLVMBuildStore(b, v_retval, v_resvaluep); + + /* if result of function is NULL, force NULL result */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, + LLVMIntEQ, + v_fcinfo_isnull, + l_sbool_const(0), + ""), + b_compare_result, + b_null); + + /* build block analyzing the !NULL comparator result */ + LLVMPositionBuilderAtEnd(b, b_compare_result); + + /* if results equal, compare next, otherwise done */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, + LLVMIntEQ, + v_retval, + l_sizet_const(0), ""), + opblocks[opno + 1], + opblocks[op->d.rowcompare_step.jumpdone]); + + /* + * Build block handling NULL input or NULL comparator + * result. + */ + LLVMPositionBuilderAtEnd(b, b_null); + LLVMBuildStore(b, l_sbool_const(1), v_resnullp); + LLVMBuildBr(b, opblocks[op->d.rowcompare_step.jumpnull]); + + break; + } + + case EEOP_ROWCOMPARE_FINAL: + { + RowCompareType rctype = op->d.rowcompare_final.rctype; + + LLVMValueRef v_cmpresult; + LLVMValueRef v_result; + LLVMIntPredicate predicate; + + /* + * Btree comparators return 32 bit results, need to be + * careful about sign (used as a 64 bit value it's + * otherwise wrong). + */ + v_cmpresult = + LLVMBuildTrunc(b, + LLVMBuildLoad(b, v_resvaluep, ""), + LLVMInt32Type(), ""); + + switch (rctype) + { + case ROWCOMPARE_LT: + predicate = LLVMIntSLT; + break; + case ROWCOMPARE_LE: + predicate = LLVMIntSLE; + break; + case ROWCOMPARE_GT: + predicate = LLVMIntSGT; + break; + case ROWCOMPARE_GE: + predicate = LLVMIntSGE; + break; + default: + /* EQ and NE cases aren't allowed here */ + Assert(false); + predicate = 0; /* prevent compiler warning */ + break; + } + + v_result = LLVMBuildICmp(b, + predicate, + v_cmpresult, + l_int32_const(0), + ""); + v_result = LLVMBuildZExt(b, v_result, TypeSizeT, ""); + + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + LLVMBuildStore(b, v_result, v_resvaluep); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_MINMAX: + build_EvalXFunc(b, mod, "ExecEvalMinMax", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_FIELDSELECT: + build_EvalXFunc(b, mod, "ExecEvalFieldSelect", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_FIELDSTORE_DEFORM: + build_EvalXFunc(b, mod, "ExecEvalFieldStoreDeForm", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_FIELDSTORE_FORM: + build_EvalXFunc(b, mod, "ExecEvalFieldStoreForm", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_DOMAIN_TESTVAL: + { + LLVMBasicBlockRef b_avail, + b_notavail; + LLVMValueRef v_casevaluep, + v_casevalue; + LLVMValueRef v_casenullp, + v_casenull; + LLVMValueRef v_casevaluenull; + + b_avail = l_bb_before_v(opblocks[opno + 1], + "op.%d.avail", opno); + b_notavail = l_bb_before_v(opblocks[opno + 1], + "op.%d.notavail", opno); + + v_casevaluep = l_ptr_const(op->d.casetest.value, + l_ptr(TypeSizeT)); + v_casenullp = l_ptr_const(op->d.casetest.isnull, + l_ptr(TypeStorageBool)); + + v_casevaluenull = + LLVMBuildICmp(b, LLVMIntEQ, + LLVMBuildPtrToInt(b, v_casevaluep, + TypeSizeT, ""), + l_sizet_const(0), ""); + LLVMBuildCondBr(b, + v_casevaluenull, + b_notavail, b_avail); + + /* if casetest != NULL */ + LLVMPositionBuilderAtEnd(b, b_avail); + v_casevalue = LLVMBuildLoad(b, v_casevaluep, ""); + v_casenull = LLVMBuildLoad(b, v_casenullp, ""); + LLVMBuildStore(b, v_casevalue, v_resvaluep); + LLVMBuildStore(b, v_casenull, v_resnullp); + LLVMBuildBr(b, opblocks[opno + 1]); + + /* if casetest == NULL */ + LLVMPositionBuilderAtEnd(b, b_notavail); + v_casevalue = + l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_DOMAINDATUM, + ""); + v_casenull = + l_load_struct_gep(b, v_econtext, + FIELDNO_EXPRCONTEXT_DOMAINNULL, + ""); + LLVMBuildStore(b, v_casevalue, v_resvaluep); + LLVMBuildStore(b, v_casenull, v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_DOMAIN_NOTNULL: + build_EvalXFunc(b, mod, "ExecEvalConstraintNotNull", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_DOMAIN_CHECK: + build_EvalXFunc(b, mod, "ExecEvalConstraintCheck", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_CONVERT_ROWTYPE: + build_EvalXFunc(b, mod, "ExecEvalConvertRowtype", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_SCALARARRAYOP: + build_EvalXFunc(b, mod, "ExecEvalScalarArrayOp", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_HASHED_SCALARARRAYOP: + build_EvalXFunc(b, mod, "ExecEvalHashedScalarArrayOp", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_XMLEXPR: + build_EvalXFunc(b, mod, "ExecEvalXmlExpr", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_AGGREF: + { + LLVMValueRef v_aggno; + LLVMValueRef value, + isnull; + + v_aggno = l_int32_const(op->d.aggref.aggno); + + /* load agg value / null */ + value = l_load_gep1(b, v_aggvalues, v_aggno, "aggvalue"); + isnull = l_load_gep1(b, v_aggnulls, v_aggno, "aggnull"); + + /* and store result */ + LLVMBuildStore(b, value, v_resvaluep); + LLVMBuildStore(b, isnull, v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_GROUPING_FUNC: + build_EvalXFunc(b, mod, "ExecEvalGroupingFunc", + v_state, op); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_WINDOW_FUNC: + { + WindowFuncExprState *wfunc = op->d.window_func.wfstate; + LLVMValueRef v_wfuncnop; + LLVMValueRef v_wfuncno; + LLVMValueRef value, + isnull; + + /* + * At this point aggref->wfuncno is not yet set (it's set + * up in ExecInitWindowAgg() after initializing the + * expression). So load it from memory each time round. + */ + v_wfuncnop = l_ptr_const(&wfunc->wfuncno, + l_ptr(LLVMInt32Type())); + v_wfuncno = LLVMBuildLoad(b, v_wfuncnop, "v_wfuncno"); + + /* load window func value / null */ + value = l_load_gep1(b, v_aggvalues, v_wfuncno, + "windowvalue"); + isnull = l_load_gep1(b, v_aggnulls, v_wfuncno, + "windownull"); + + LLVMBuildStore(b, value, v_resvaluep); + LLVMBuildStore(b, isnull, v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_SUBPLAN: + build_EvalXFunc(b, mod, "ExecEvalSubPlan", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_AGG_STRICT_DESERIALIZE: + case EEOP_AGG_DESERIALIZE: + { + AggState *aggstate; + FunctionCallInfo fcinfo = op->d.agg_deserialize.fcinfo_data; + + LLVMValueRef v_retval; + LLVMValueRef v_fcinfo_isnull; + LLVMValueRef v_tmpcontext; + LLVMValueRef v_oldcontext; + + if (opcode == EEOP_AGG_STRICT_DESERIALIZE) + { + LLVMValueRef v_fcinfo; + LLVMValueRef v_argnull0; + LLVMBasicBlockRef b_deserialize; + + b_deserialize = l_bb_before_v(opblocks[opno + 1], + "op.%d.deserialize", opno); + + v_fcinfo = l_ptr_const(fcinfo, + l_ptr(StructFunctionCallInfoData)); + v_argnull0 = l_funcnull(b, v_fcinfo, 0); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, + LLVMIntEQ, + v_argnull0, + l_sbool_const(1), + ""), + opblocks[op->d.agg_deserialize.jumpnull], + b_deserialize); + LLVMPositionBuilderAtEnd(b, b_deserialize); + } + + aggstate = castNode(AggState, state->parent); + fcinfo = op->d.agg_deserialize.fcinfo_data; + + v_tmpcontext = + l_ptr_const(aggstate->tmpcontext->ecxt_per_tuple_memory, + l_ptr(StructMemoryContextData)); + v_oldcontext = l_mcxt_switch(mod, b, v_tmpcontext); + v_retval = BuildV1Call(context, b, mod, fcinfo, + &v_fcinfo_isnull); + l_mcxt_switch(mod, b, v_oldcontext); + + LLVMBuildStore(b, v_retval, v_resvaluep); + LLVMBuildStore(b, v_fcinfo_isnull, v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_AGG_STRICT_INPUT_CHECK_ARGS: + case EEOP_AGG_STRICT_INPUT_CHECK_NULLS: + { + int nargs = op->d.agg_strict_input_check.nargs; + NullableDatum *args = op->d.agg_strict_input_check.args; + bool *nulls = op->d.agg_strict_input_check.nulls; + int jumpnull; + + LLVMValueRef v_argsp; + LLVMValueRef v_nullsp; + LLVMBasicBlockRef *b_checknulls; + + Assert(nargs > 0); + + jumpnull = op->d.agg_strict_input_check.jumpnull; + v_argsp = l_ptr_const(args, l_ptr(StructNullableDatum)); + v_nullsp = l_ptr_const(nulls, l_ptr(TypeStorageBool)); + + /* create blocks for checking args */ + b_checknulls = palloc(sizeof(LLVMBasicBlockRef *) * nargs); + for (int argno = 0; argno < nargs; argno++) + { + b_checknulls[argno] = + l_bb_before_v(opblocks[opno + 1], + "op.%d.check-null.%d", + opno, argno); + } + + LLVMBuildBr(b, b_checknulls[0]); + + /* strict function, check for NULL args */ + for (int argno = 0; argno < nargs; argno++) + { + LLVMValueRef v_argno = l_int32_const(argno); + LLVMValueRef v_argisnull; + LLVMBasicBlockRef b_argnotnull; + + LLVMPositionBuilderAtEnd(b, b_checknulls[argno]); + + if (argno + 1 == nargs) + b_argnotnull = opblocks[opno + 1]; + else + b_argnotnull = b_checknulls[argno + 1]; + + if (opcode == EEOP_AGG_STRICT_INPUT_CHECK_NULLS) + v_argisnull = l_load_gep1(b, v_nullsp, v_argno, ""); + else + { + LLVMValueRef v_argn; + + v_argn = LLVMBuildGEP(b, v_argsp, &v_argno, 1, ""); + v_argisnull = + l_load_struct_gep(b, v_argn, + FIELDNO_NULLABLE_DATUM_ISNULL, + ""); + } + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, + LLVMIntEQ, + v_argisnull, + l_sbool_const(1), ""), + opblocks[jumpnull], + b_argnotnull); + } + + break; + } + + case EEOP_AGG_PLAIN_PERGROUP_NULLCHECK: + { + int jumpnull; + LLVMValueRef v_aggstatep; + LLVMValueRef v_allpergroupsp; + LLVMValueRef v_pergroup_allaggs; + LLVMValueRef v_setoff; + + jumpnull = op->d.agg_plain_pergroup_nullcheck.jumpnull; + + /* + * pergroup_allaggs = aggstate->all_pergroups + * [op->d.agg_plain_pergroup_nullcheck.setoff]; + */ + v_aggstatep = LLVMBuildBitCast(b, v_parent, + l_ptr(StructAggState), ""); + + v_allpergroupsp = l_load_struct_gep(b, v_aggstatep, + FIELDNO_AGGSTATE_ALL_PERGROUPS, + "aggstate.all_pergroups"); + + v_setoff = l_int32_const(op->d.agg_plain_pergroup_nullcheck.setoff); + + v_pergroup_allaggs = l_load_gep1(b, v_allpergroupsp, v_setoff, ""); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, + LLVMBuildPtrToInt(b, v_pergroup_allaggs, TypeSizeT, ""), + l_sizet_const(0), ""), + opblocks[jumpnull], + opblocks[opno + 1]); + break; + } + + case EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL: + case EEOP_AGG_PLAIN_TRANS_STRICT_BYVAL: + case EEOP_AGG_PLAIN_TRANS_BYVAL: + case EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF: + case EEOP_AGG_PLAIN_TRANS_STRICT_BYREF: + case EEOP_AGG_PLAIN_TRANS_BYREF: + { + AggState *aggstate; + AggStatePerTrans pertrans; + FunctionCallInfo fcinfo; + + LLVMValueRef v_aggstatep; + LLVMValueRef v_fcinfo; + LLVMValueRef v_fcinfo_isnull; + + LLVMValueRef v_transvaluep; + LLVMValueRef v_transnullp; + + LLVMValueRef v_setoff; + LLVMValueRef v_transno; + + LLVMValueRef v_aggcontext; + + LLVMValueRef v_allpergroupsp; + LLVMValueRef v_current_setp; + LLVMValueRef v_current_pertransp; + LLVMValueRef v_curaggcontext; + + LLVMValueRef v_pertransp; + + LLVMValueRef v_pergroupp; + + LLVMValueRef v_retval; + + LLVMValueRef v_tmpcontext; + LLVMValueRef v_oldcontext; + + aggstate = castNode(AggState, state->parent); + pertrans = op->d.agg_trans.pertrans; + + fcinfo = pertrans->transfn_fcinfo; + + v_aggstatep = + LLVMBuildBitCast(b, v_parent, l_ptr(StructAggState), ""); + v_pertransp = l_ptr_const(pertrans, + l_ptr(StructAggStatePerTransData)); + + /* + * pergroup = &aggstate->all_pergroups + * [op->d.agg_strict_trans_check.setoff] + * [op->d.agg_init_trans_check.transno]; + */ + v_allpergroupsp = + l_load_struct_gep(b, v_aggstatep, + FIELDNO_AGGSTATE_ALL_PERGROUPS, + "aggstate.all_pergroups"); + v_setoff = l_int32_const(op->d.agg_trans.setoff); + v_transno = l_int32_const(op->d.agg_trans.transno); + v_pergroupp = + LLVMBuildGEP(b, + l_load_gep1(b, v_allpergroupsp, v_setoff, ""), + &v_transno, 1, ""); + + + if (opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL || + opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF) + { + LLVMValueRef v_notransvalue; + LLVMBasicBlockRef b_init; + LLVMBasicBlockRef b_no_init; + + v_notransvalue = + l_load_struct_gep(b, v_pergroupp, + FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE, + "notransvalue"); + + b_init = l_bb_before_v(opblocks[opno + 1], + "op.%d.inittrans", opno); + b_no_init = l_bb_before_v(opblocks[opno + 1], + "op.%d.no_inittrans", opno); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_notransvalue, + l_sbool_const(1), ""), + b_init, + b_no_init); + + /* block to init the transition value if necessary */ + { + LLVMValueRef params[4]; + + LLVMPositionBuilderAtEnd(b, b_init); + + v_aggcontext = l_ptr_const(op->d.agg_trans.aggcontext, + l_ptr(StructExprContext)); + + params[0] = v_aggstatep; + params[1] = v_pertransp; + params[2] = v_pergroupp; + params[3] = v_aggcontext; + + LLVMBuildCall(b, + llvm_pg_func(mod, "ExecAggInitGroup"), + params, lengthof(params), + ""); + + LLVMBuildBr(b, opblocks[opno + 1]); + + } + + LLVMPositionBuilderAtEnd(b, b_no_init); + } + + if (opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL || + opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF || + opcode == EEOP_AGG_PLAIN_TRANS_STRICT_BYVAL || + opcode == EEOP_AGG_PLAIN_TRANS_STRICT_BYREF) + { + LLVMValueRef v_transnull; + LLVMBasicBlockRef b_strictpass; + + b_strictpass = l_bb_before_v(opblocks[opno + 1], + "op.%d.strictpass", opno); + v_transnull = + l_load_struct_gep(b, v_pergroupp, + FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL, + "transnull"); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_transnull, + l_sbool_const(1), ""), + opblocks[opno + 1], + b_strictpass); + + LLVMPositionBuilderAtEnd(b, b_strictpass); + } + + + v_fcinfo = l_ptr_const(fcinfo, + l_ptr(StructFunctionCallInfoData)); + v_aggcontext = l_ptr_const(op->d.agg_trans.aggcontext, + l_ptr(StructExprContext)); + + v_current_setp = + LLVMBuildStructGEP(b, + v_aggstatep, + FIELDNO_AGGSTATE_CURRENT_SET, + "aggstate.current_set"); + v_curaggcontext = + LLVMBuildStructGEP(b, + v_aggstatep, + FIELDNO_AGGSTATE_CURAGGCONTEXT, + "aggstate.curaggcontext"); + v_current_pertransp = + LLVMBuildStructGEP(b, + v_aggstatep, + FIELDNO_AGGSTATE_CURPERTRANS, + "aggstate.curpertrans"); + + /* set aggstate globals */ + LLVMBuildStore(b, v_aggcontext, v_curaggcontext); + LLVMBuildStore(b, l_int32_const(op->d.agg_trans.setno), + v_current_setp); + LLVMBuildStore(b, v_pertransp, v_current_pertransp); + + /* invoke transition function in per-tuple context */ + v_tmpcontext = + l_ptr_const(aggstate->tmpcontext->ecxt_per_tuple_memory, + l_ptr(StructMemoryContextData)); + v_oldcontext = l_mcxt_switch(mod, b, v_tmpcontext); + + /* store transvalue in fcinfo->args[0] */ + v_transvaluep = + LLVMBuildStructGEP(b, v_pergroupp, + FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUE, + "transvalue"); + v_transnullp = + LLVMBuildStructGEP(b, v_pergroupp, + FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL, + "transnullp"); + LLVMBuildStore(b, + LLVMBuildLoad(b, v_transvaluep, + "transvalue"), + l_funcvaluep(b, v_fcinfo, 0)); + LLVMBuildStore(b, + LLVMBuildLoad(b, v_transnullp, "transnull"), + l_funcnullp(b, v_fcinfo, 0)); + + /* and invoke transition function */ + v_retval = BuildV1Call(context, b, mod, fcinfo, + &v_fcinfo_isnull); + + /* + * For pass-by-ref datatype, must copy the new value into + * aggcontext and free the prior transValue. But if + * transfn returned a pointer to its first input, we don't + * need to do anything. Also, if transfn returned a + * pointer to a R/W expanded object that is already a + * child of the aggcontext, assume we can adopt that value + * without copying it. + */ + if (opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF || + opcode == EEOP_AGG_PLAIN_TRANS_STRICT_BYREF || + opcode == EEOP_AGG_PLAIN_TRANS_BYREF) + { + LLVMBasicBlockRef b_call; + LLVMBasicBlockRef b_nocall; + LLVMValueRef v_fn; + LLVMValueRef v_transvalue; + LLVMValueRef v_transnull; + LLVMValueRef v_newval; + LLVMValueRef params[6]; + + b_call = l_bb_before_v(opblocks[opno + 1], + "op.%d.transcall", opno); + b_nocall = l_bb_before_v(opblocks[opno + 1], + "op.%d.transnocall", opno); + + v_transvalue = LLVMBuildLoad(b, v_transvaluep, ""); + v_transnull = LLVMBuildLoad(b, v_transnullp, ""); + + /* + * DatumGetPointer(newVal) != + * DatumGetPointer(pergroup->transValue)) + */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, + v_transvalue, + v_retval, ""), + b_nocall, b_call); + + /* returned datum not passed datum, reparent */ + LLVMPositionBuilderAtEnd(b, b_call); + + params[0] = v_aggstatep; + params[1] = v_pertransp; + params[2] = v_retval; + params[3] = LLVMBuildTrunc(b, v_fcinfo_isnull, + TypeParamBool, ""); + params[4] = v_transvalue; + params[5] = LLVMBuildTrunc(b, v_transnull, + TypeParamBool, ""); + + v_fn = llvm_pg_func(mod, "ExecAggTransReparent"); + v_newval = + LLVMBuildCall(b, v_fn, + params, lengthof(params), + ""); + + /* store trans value */ + LLVMBuildStore(b, v_newval, v_transvaluep); + LLVMBuildStore(b, v_fcinfo_isnull, v_transnullp); + + l_mcxt_switch(mod, b, v_oldcontext); + LLVMBuildBr(b, opblocks[opno + 1]); + + /* returned datum passed datum, no need to reparent */ + LLVMPositionBuilderAtEnd(b, b_nocall); + } + + /* store trans value */ + LLVMBuildStore(b, v_retval, v_transvaluep); + LLVMBuildStore(b, v_fcinfo_isnull, v_transnullp); + + l_mcxt_switch(mod, b, v_oldcontext); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_AGG_ORDERED_TRANS_DATUM: + build_EvalXFunc(b, mod, "ExecEvalAggOrderedTransDatum", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_AGG_ORDERED_TRANS_TUPLE: + build_EvalXFunc(b, mod, "ExecEvalAggOrderedTransTuple", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_LAST: + Assert(false); + break; + } + } + + LLVMDisposeBuilder(b); + + /* + * Don't immediately emit function, instead do so the first time the + * expression is actually evaluated. That allows to emit a lot of + * functions together, avoiding a lot of repeated llvm and memory + * remapping overhead. + */ + { + + CompiledExprState *cstate = palloc0(sizeof(CompiledExprState)); + + cstate->context = context; + cstate->funcname = funcname; + + state->evalfunc = ExecRunCompiledExpr; + state->evalfunc_private = cstate; + } + + llvm_leave_fatal_on_oom(); + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(context->base.instr.generation_counter, + endtime, starttime); + + return true; +} + +/* + * Run compiled expression. + * + * This will only be called the first time a JITed expression is called. We + * first make sure the expression is still up2date, and then get a pointer to + * the emitted function. The latter can be the first thing that triggers + * optimizing and emitting all the generated functions. + */ +static Datum +ExecRunCompiledExpr(ExprState *state, ExprContext *econtext, bool *isNull) +{ + CompiledExprState *cstate = state->evalfunc_private; + ExprStateEvalFunc func; + + CheckExprStillValid(state, econtext); + + llvm_enter_fatal_on_oom(); + func = (ExprStateEvalFunc) llvm_get_function(cstate->context, + cstate->funcname); + llvm_leave_fatal_on_oom(); + Assert(func); + + /* remove indirection via this function for future calls */ + state->evalfunc = func; + + return func(state, econtext, isNull); +} + +static LLVMValueRef +BuildV1Call(LLVMJitContext *context, LLVMBuilderRef b, + LLVMModuleRef mod, FunctionCallInfo fcinfo, + LLVMValueRef *v_fcinfo_isnull) +{ + LLVMValueRef v_fn; + LLVMValueRef v_fcinfo_isnullp; + LLVMValueRef v_retval; + LLVMValueRef v_fcinfo; + + v_fn = llvm_function_reference(context, b, mod, fcinfo); + + v_fcinfo = l_ptr_const(fcinfo, l_ptr(StructFunctionCallInfoData)); + v_fcinfo_isnullp = LLVMBuildStructGEP(b, v_fcinfo, + FIELDNO_FUNCTIONCALLINFODATA_ISNULL, + "v_fcinfo_isnull"); + LLVMBuildStore(b, l_sbool_const(0), v_fcinfo_isnullp); + + v_retval = LLVMBuildCall(b, v_fn, &v_fcinfo, 1, "funccall"); + + if (v_fcinfo_isnull) + *v_fcinfo_isnull = LLVMBuildLoad(b, v_fcinfo_isnullp, ""); + + /* + * Add lifetime-end annotation, signaling that writes to memory don't have + * to be retained (important for inlining potential). + */ + { + LLVMValueRef v_lifetime = create_LifetimeEnd(mod); + LLVMValueRef params[2]; + + params[0] = l_int64_const(sizeof(NullableDatum) * fcinfo->nargs); + params[1] = l_ptr_const(fcinfo->args, l_ptr(LLVMInt8Type())); + LLVMBuildCall(b, v_lifetime, params, lengthof(params), ""); + + params[0] = l_int64_const(sizeof(fcinfo->isnull)); + params[1] = l_ptr_const(&fcinfo->isnull, l_ptr(LLVMInt8Type())); + LLVMBuildCall(b, v_lifetime, params, lengthof(params), ""); + } + + return v_retval; +} + +/* + * Implement an expression step by calling the function funcname. + */ +static LLVMValueRef +build_EvalXFuncInt(LLVMBuilderRef b, LLVMModuleRef mod, const char *funcname, + LLVMValueRef v_state, ExprEvalStep *op, + int nargs, LLVMValueRef *v_args) +{ + LLVMValueRef v_fn = llvm_pg_func(mod, funcname); + LLVMValueRef *params; + int argno = 0; + LLVMValueRef v_ret; + + /* cheap pre-check as llvm just asserts out */ + if (LLVMCountParams(v_fn) != (nargs + 2)) + elog(ERROR, "parameter mismatch: %s expects %d passed %d", + funcname, LLVMCountParams(v_fn), nargs + 2); + + params = palloc(sizeof(LLVMValueRef) * (2 + nargs)); + + params[argno++] = v_state; + params[argno++] = l_ptr_const(op, l_ptr(StructExprEvalStep)); + + for (int i = 0; i < nargs; i++) + params[argno++] = v_args[i]; + + v_ret = LLVMBuildCall(b, v_fn, params, argno, ""); + + pfree(params); + + return v_ret; +} + +static LLVMValueRef +create_LifetimeEnd(LLVMModuleRef mod) +{ + LLVMTypeRef sig; + LLVMValueRef fn; + LLVMTypeRef param_types[2]; + + /* LLVM 5+ has a variadic pointer argument */ +#if LLVM_VERSION_MAJOR < 5 + const char *nm = "llvm.lifetime.end"; +#else + const char *nm = "llvm.lifetime.end.p0i8"; +#endif + + fn = LLVMGetNamedFunction(mod, nm); + if (fn) + return fn; + + param_types[0] = LLVMInt64Type(); + param_types[1] = l_ptr(LLVMInt8Type()); + + sig = LLVMFunctionType(LLVMVoidType(), + param_types, lengthof(param_types), + false); + fn = LLVMAddFunction(mod, nm, sig); + + LLVMSetFunctionCallConv(fn, LLVMCCallConv); + + Assert(LLVMGetIntrinsicID(fn)); + + return fn; +} diff --git a/src/backend/jit/llvm/llvmjit_inline.cpp b/src/backend/jit/llvm/llvmjit_inline.cpp new file mode 100644 index 0000000..9bb4b67 --- /dev/null +++ b/src/backend/jit/llvm/llvmjit_inline.cpp @@ -0,0 +1,899 @@ +/*------------------------------------------------------------------------- + * + * llvmjit_inline.cpp + * Cross module inlining suitable for postgres' JIT + * + * The inliner iterates over external functions referenced from the passed + * module and attempts to inline those. It does so by utilizing pre-built + * indexes over both postgres core code and extension modules. When a match + * for an external function is found - not guaranteed! - the index will then + * be used to judge their instruction count / inline worthiness. After doing + * so for all external functions, all the referenced functions (and + * prerequisites) will be imported. + * + * Copyright (c) 2016-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/lib/llvmjit/llvmjit_inline.cpp + * + *------------------------------------------------------------------------- + */ + +extern "C" +{ +#include "postgres.h" +} + +#include "jit/llvmjit.h" + +extern "C" +{ +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "common/string.h" +#include "miscadmin.h" +#include "storage/fd.h" +} + +#include <llvm-c/Core.h> +#include <llvm-c/BitReader.h> + +/* Avoid macro clash with LLVM's C++ headers */ +#undef Min + +#include <llvm/ADT/SetVector.h> +#include <llvm/ADT/StringSet.h> +#include <llvm/ADT/StringMap.h> +#include <llvm/Analysis/ModuleSummaryAnalysis.h> +#if LLVM_VERSION_MAJOR > 3 +#include <llvm/Bitcode/BitcodeReader.h> +#else +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Support/Error.h> +#endif +#include <llvm/IR/Attributes.h> +#include <llvm/IR/DebugInfo.h> +#include <llvm/IR/IntrinsicInst.h> +#include <llvm/IR/IRBuilder.h> +#include <llvm/IR/ModuleSummaryIndex.h> +#include <llvm/Linker/IRMover.h> +#include <llvm/Support/ManagedStatic.h> + + +/* + * Type used to represent modules InlineWorkListItem's subject is searched for + * in. + */ +typedef llvm::SmallVector<llvm::ModuleSummaryIndex *, 2> InlineSearchPath; + +/* + * Item in queue of to-be-checked symbols and corresponding queue. + */ +typedef struct InlineWorkListItem +{ + llvm::StringRef symbolName; + llvm::SmallVector<llvm::ModuleSummaryIndex *, 2> searchpath; +} InlineWorkListItem; +typedef llvm::SmallVector<InlineWorkListItem, 128> InlineWorkList; + +/* + * Information about symbols processed during inlining. Used to prevent + * repeated searches and provide additional information. + */ +typedef struct FunctionInlineState +{ + int costLimit; + bool processed; + bool inlined; + bool allowReconsidering; +} FunctionInlineState; +typedef llvm::StringMap<FunctionInlineState> FunctionInlineStates; + +/* + * Map of modules that should be inlined, with a list of the to-be inlined + * symbols. + */ +typedef llvm::StringMap<llvm::StringSet<> > ImportMapTy; + + +const float inline_cost_decay_factor = 0.5; +const int inline_initial_cost = 150; + +/* + * These are managed statics so LLVM knows to deallocate them during an + * LLVMShutdown(), rather than after (which'd cause crashes). + */ +typedef llvm::StringMap<std::unique_ptr<llvm::Module> > ModuleCache; +llvm::ManagedStatic<ModuleCache> module_cache; +typedef llvm::StringMap<std::unique_ptr<llvm::ModuleSummaryIndex> > SummaryCache; +llvm::ManagedStatic<SummaryCache> summary_cache; + + +static std::unique_ptr<ImportMapTy> llvm_build_inline_plan(llvm::Module *mod); +static void llvm_execute_inline_plan(llvm::Module *mod, + ImportMapTy *globalsToInline); + +static llvm::Module* load_module_cached(llvm::StringRef modPath); +static std::unique_ptr<llvm::Module> load_module(llvm::StringRef Identifier); +static std::unique_ptr<llvm::ModuleSummaryIndex> llvm_load_summary(llvm::StringRef path); + + +static llvm::Function* create_redirection_function(std::unique_ptr<llvm::Module> &importMod, + llvm::Function *F, + llvm::StringRef Name); + +static bool function_inlinable(llvm::Function &F, + int threshold, + FunctionInlineStates &functionState, + InlineWorkList &worklist, + InlineSearchPath &searchpath, + llvm::SmallPtrSet<const llvm::Function *, 8> &visitedFunctions, + int &running_instcount, + llvm::StringSet<> &importVars); +static void function_references(llvm::Function &F, + int &running_instcount, + llvm::SmallPtrSet<llvm::GlobalVariable *, 8> &referencedVars, + llvm::SmallPtrSet<llvm::Function *, 8> &referencedFunctions); + +static void add_module_to_inline_search_path(InlineSearchPath& path, llvm::StringRef modpath); +static llvm::SmallVector<llvm::GlobalValueSummary *, 1> +summaries_for_guid(const InlineSearchPath& path, llvm::GlobalValue::GUID guid); + +/* verbose debugging for inliner development */ +/* #define INLINE_DEBUG */ +#ifdef INLINE_DEBUG +#define ilog elog +#else +#define ilog(...) (void) 0 +#endif + +/* + * Perform inlining of external function references in M based on a simple + * cost based analysis. + */ +void +llvm_inline(LLVMModuleRef M) +{ + llvm::Module *mod = llvm::unwrap(M); + + std::unique_ptr<ImportMapTy> globalsToInline = llvm_build_inline_plan(mod); + if (!globalsToInline) + return; + llvm_execute_inline_plan(mod, globalsToInline.get()); +} + +/* + * Build information necessary for inlining external function references in + * mod. + */ +static std::unique_ptr<ImportMapTy> +llvm_build_inline_plan(llvm::Module *mod) +{ + std::unique_ptr<ImportMapTy> globalsToInline(new ImportMapTy()); + FunctionInlineStates functionStates; + InlineWorkList worklist; + + InlineSearchPath defaultSearchPath; + + /* attempt to add module to search path */ + add_module_to_inline_search_path(defaultSearchPath, "$libdir/postgres"); + /* if postgres isn't available, no point continuing */ + if (defaultSearchPath.empty()) + return nullptr; + + /* + * Start inlining with current references to external functions by putting + * them on the inlining worklist. If, during inlining of those, new extern + * functions need to be inlined, they'll also be put there, with a lower + * priority. + */ + for (const llvm::Function &funcDecl : mod->functions()) + { + InlineWorkListItem item = {}; + FunctionInlineState inlineState = {}; + + /* already has a definition */ + if (!funcDecl.isDeclaration()) + continue; + + /* llvm provides implementation */ + if (funcDecl.isIntrinsic()) + continue; + + item.symbolName = funcDecl.getName(); + item.searchpath = defaultSearchPath; + worklist.push_back(item); + inlineState.costLimit = inline_initial_cost; + inlineState.processed = false; + inlineState.inlined = false; + inlineState.allowReconsidering = false; + functionStates[funcDecl.getName()] = inlineState; + } + + /* + * Iterate over pending worklist items, look them up in index, check + * whether they should be inlined. + */ + while (!worklist.empty()) + { + InlineWorkListItem item = worklist.pop_back_val(); + llvm::StringRef symbolName = item.symbolName; + char *cmodname; + char *cfuncname; + FunctionInlineState &inlineState = functionStates[symbolName]; + llvm::GlobalValue::GUID funcGUID; + + llvm_split_symbol_name(symbolName.data(), &cmodname, &cfuncname); + + funcGUID = llvm::GlobalValue::getGUID(cfuncname); + + /* already processed */ + if (inlineState.processed) + continue; + + + if (cmodname) + add_module_to_inline_search_path(item.searchpath, cmodname); + + /* + * Iterate over all known definitions of function, via the index. Then + * look up module(s), check if function actually is defined (there + * could be hash conflicts). + */ + for (const auto &gvs : summaries_for_guid(item.searchpath, funcGUID)) + { + const llvm::FunctionSummary *fs; + llvm::StringRef modPath = gvs->modulePath(); + llvm::Module *defMod; + llvm::Function *funcDef; + + fs = llvm::cast<llvm::FunctionSummary>(gvs); + +#if LLVM_VERSION_MAJOR > 3 + if (gvs->notEligibleToImport()) + { + ilog(DEBUG1, "ineligibile to import %s due to summary", + symbolName.data()); + continue; + } +#endif + + if ((int) fs->instCount() > inlineState.costLimit) + { + ilog(DEBUG1, "ineligibile to import %s due to early threshold: %u vs %u", + symbolName.data(), fs->instCount(), inlineState.costLimit); + inlineState.allowReconsidering = true; + continue; + } + + defMod = load_module_cached(modPath); + if (defMod->materializeMetadata()) + elog(FATAL, "failed to materialize metadata"); + + funcDef = defMod->getFunction(cfuncname); + + /* + * This can happen e.g. in case of a hash collision of the + * function's name. + */ + if (!funcDef) + continue; + + if (funcDef->materialize()) + elog(FATAL, "failed to materialize metadata"); + + Assert(!funcDef->isDeclaration()); + Assert(funcDef->hasExternalLinkage()); + + llvm::StringSet<> importVars; + llvm::SmallPtrSet<const llvm::Function *, 8> visitedFunctions; + int running_instcount = 0; + + /* + * Check whether function, and objects it depends on, are + * inlinable. + */ + if (function_inlinable(*funcDef, + inlineState.costLimit, + functionStates, + worklist, + item.searchpath, + visitedFunctions, + running_instcount, + importVars)) + { + /* + * Check whether function and all its dependencies are too + * big. Dependencies already counted for other functions that + * will get inlined are not counted again. While this make + * things somewhat order dependent, I can't quite see a point + * in a different behaviour. + */ + if (running_instcount > inlineState.costLimit) + { + ilog(DEBUG1, "skipping inlining of %s due to late threshold %d vs %d", + symbolName.data(), running_instcount, inlineState.costLimit); + inlineState.allowReconsidering = true; + continue; + } + + ilog(DEBUG1, "inline top function %s total_instcount: %d, partial: %d", + symbolName.data(), running_instcount, fs->instCount()); + + /* import referenced function itself */ + importVars.insert(symbolName); + + { + llvm::StringSet<> &modGlobalsToInline = (*globalsToInline)[modPath]; + for (auto& importVar : importVars) + modGlobalsToInline.insert(importVar.first()); + Assert(modGlobalsToInline.size() > 0); + } + + /* mark function as inlined */ + inlineState.inlined = true; + + /* + * Found definition to inline, don't look for further + * potential definitions. + */ + break; + } + else + { + ilog(DEBUG1, "had to skip inlining %s", + symbolName.data()); + + /* It's possible there's another definition that's inlinable. */ + } + } + + /* + * Signal that we're done with symbol, whether successful (inlined = + * true above) or not. + */ + inlineState.processed = true; + } + + return globalsToInline; +} + +/* + * Perform the actual inlining of external functions (and their dependencies) + * into mod. + */ +static void +llvm_execute_inline_plan(llvm::Module *mod, ImportMapTy *globalsToInline) +{ + llvm::IRMover Mover(*mod); + + for (const auto& toInline : *globalsToInline) + { + const llvm::StringRef& modPath = toInline.first(); + const llvm::StringSet<>& modGlobalsToInline = toInline.second; + llvm::SetVector<llvm::GlobalValue *> GlobalsToImport; + + Assert(module_cache->count(modPath)); + std::unique_ptr<llvm::Module> importMod(std::move((*module_cache)[modPath])); + module_cache->erase(modPath); + + if (modGlobalsToInline.empty()) + continue; + + for (auto &glob: modGlobalsToInline) + { + llvm::StringRef SymbolName = glob.first(); + char *modname; + char *funcname; + + llvm_split_symbol_name(SymbolName.data(), &modname, &funcname); + + llvm::GlobalValue *valueToImport = importMod->getNamedValue(funcname); + + if (!valueToImport) + elog(FATAL, "didn't refind value %s to import", SymbolName.data()); + + /* + * For functions (global vars are only inlined if already static), + * mark imported variables as being clones from other + * functions. That a) avoids symbol conflicts b) allows the + * optimizer to perform inlining. + */ + if (llvm::isa<llvm::Function>(valueToImport)) + { + llvm::Function *F = llvm::dyn_cast<llvm::Function>(valueToImport); + typedef llvm::GlobalValue::LinkageTypes LinkageTypes; + + /* + * Per-function info isn't necessarily stripped yet, as the + * module is lazy-loaded when stripped above. + */ + llvm::stripDebugInfo(*F); + + /* + * If the to-be-imported function is one referenced including + * its module name, create a tiny inline function that just + * forwards the call. One might think a GlobalAlias would do + * the trick, but a) IRMover doesn't override a declaration + * with an alias pointing to a definition (instead renaming + * it), b) Aliases can't be AvailableExternally. + */ + if (modname) + { + llvm::Function *AF; + + AF = create_redirection_function(importMod, F, SymbolName); + + GlobalsToImport.insert(AF); + llvm::stripDebugInfo(*AF); + } + + if (valueToImport->hasExternalLinkage()) + { + valueToImport->setLinkage(LinkageTypes::AvailableExternallyLinkage); + } + } + + GlobalsToImport.insert(valueToImport); + ilog(DEBUG1, "performing import of %s %s", + modPath.data(), SymbolName.data()); + + } + +#if LLVM_VERSION_MAJOR > 4 +#define IRMOVE_PARAMS , /*IsPerformingImport=*/false +#elif LLVM_VERSION_MAJOR > 3 +#define IRMOVE_PARAMS , /*LinkModuleInlineAsm=*/false, /*IsPerformingImport=*/false +#else +#define IRMOVE_PARAMS +#endif + if (Mover.move(std::move(importMod), GlobalsToImport.getArrayRef(), + [](llvm::GlobalValue &, llvm::IRMover::ValueAdder) {} + IRMOVE_PARAMS)) + elog(FATAL, "function import failed with linker error"); + } +} + +/* + * Return a module identified by modPath, caching it in memory. + * + * Note that such a module may *not* be modified without copying, otherwise + * the cache state would get corrupted. + */ +static llvm::Module* +load_module_cached(llvm::StringRef modPath) +{ + auto it = module_cache->find(modPath); + if (it == module_cache->end()) + { + it = module_cache->insert( + std::make_pair(modPath, load_module(modPath))).first; + } + + return it->second.get(); +} + +static std::unique_ptr<llvm::Module> +load_module(llvm::StringRef Identifier) +{ + LLVMMemoryBufferRef buf; + LLVMModuleRef mod; + char path[MAXPGPATH]; + char *msg; + + snprintf(path, MAXPGPATH,"%s/bitcode/%s", pkglib_path, Identifier.data()); + + if (LLVMCreateMemoryBufferWithContentsOfFile(path, &buf, &msg)) + elog(FATAL, "failed to open bitcode file \"%s\": %s", + path, msg); + if (LLVMGetBitcodeModuleInContext2(LLVMGetGlobalContext(), buf, &mod)) + elog(FATAL, "failed to parse bitcode in file \"%s\"", path); + + /* + * Currently there's no use in more detailed debug info for JITed + * code. Until that changes, not much point in wasting memory and cycles + * on processing debuginfo. + */ + llvm::StripDebugInfo(*llvm::unwrap(mod)); + + return std::unique_ptr<llvm::Module>(llvm::unwrap(mod)); +} + +/* + * Compute list of referenced variables, functions and the instruction count + * for a function. + */ +static void +function_references(llvm::Function &F, + int &running_instcount, + llvm::SmallPtrSet<llvm::GlobalVariable *, 8> &referencedVars, + llvm::SmallPtrSet<llvm::Function *, 8> &referencedFunctions) +{ + llvm::SmallPtrSet<const llvm::User *, 32> Visited; + + for (llvm::BasicBlock &BB : F) + { + for (llvm::Instruction &I : BB) + { + if (llvm::isa<llvm::DbgInfoIntrinsic>(I)) + continue; + + llvm::SmallVector<llvm::User *, 8> Worklist; + Worklist.push_back(&I); + + running_instcount++; + + while (!Worklist.empty()) { + llvm::User *U = Worklist.pop_back_val(); + + /* visited before */ + if (!Visited.insert(U).second) + continue; + + for (auto &OI : U->operands()) { + llvm::User *Operand = llvm::dyn_cast<llvm::User>(OI); + if (!Operand) + continue; + if (llvm::isa<llvm::BlockAddress>(Operand)) + continue; + if (auto *GV = llvm::dyn_cast<llvm::GlobalVariable>(Operand)) { + referencedVars.insert(GV); + if (GV->hasInitializer()) + Worklist.push_back(GV->getInitializer()); + continue; + } + if (auto *CF = llvm::dyn_cast<llvm::Function>(Operand)) { + referencedFunctions.insert(CF); + continue; + } + Worklist.push_back(Operand); + } + } + } + } +} + +/* + * Check whether function F is inlinable and, if so, what globals need to be + * imported. + * + * References to external functions from, potentially recursively, inlined + * functions are added to the passed in worklist. + */ +static bool +function_inlinable(llvm::Function &F, + int threshold, + FunctionInlineStates &functionStates, + InlineWorkList &worklist, + InlineSearchPath &searchpath, + llvm::SmallPtrSet<const llvm::Function *, 8> &visitedFunctions, + int &running_instcount, + llvm::StringSet<> &importVars) +{ + int subThreshold = threshold * inline_cost_decay_factor; + llvm::SmallPtrSet<llvm::GlobalVariable *, 8> referencedVars; + llvm::SmallPtrSet<llvm::Function *, 8> referencedFunctions; + + /* can't rely on what may be inlined */ + if (F.isInterposable()) + return false; + + /* + * Can't rely on function being present. Alternatively we could create a + * static version of these functions? + */ + if (F.hasAvailableExternallyLinkage()) + return false; + + ilog(DEBUG1, "checking inlinability of %s", F.getName().data()); + + if (F.materialize()) + elog(FATAL, "failed to materialize metadata"); + +#if LLVM_VERSION_MAJOR < 14 +#define hasFnAttr hasFnAttribute +#endif + + if (F.getAttributes().hasFnAttr(llvm::Attribute::NoInline)) + { + ilog(DEBUG1, "ineligibile to import %s due to noinline", + F.getName().data()); + return false; + } + + function_references(F, running_instcount, referencedVars, referencedFunctions); + + for (llvm::GlobalVariable* rv: referencedVars) + { + if (rv->materialize()) + elog(FATAL, "failed to materialize metadata"); + + /* + * Don't inline functions that access thread local variables. That + * doesn't work on current LLVM releases (but might in future). + */ + if (rv->isThreadLocal()) + { + ilog(DEBUG1, "cannot inline %s due to thread-local variable %s", + F.getName().data(), rv->getName().data()); + return false; + } + + /* + * Never want to inline externally visible vars, cheap enough to + * reference. + */ + if (rv->hasExternalLinkage() || rv->hasAvailableExternallyLinkage()) + continue; + + /* + * If variable is file-local, we need to inline it, to be able to + * inline the function itself. Can't do that if the variable can be + * modified, because they'd obviously get out of sync. + * + * XXX: Currently not a problem, but there'd be problems with + * nontrivial initializers if they were allowed for postgres. + */ + if (!rv->isConstant()) + { + ilog(DEBUG1, "cannot inline %s due to uncloneable variable %s", + F.getName().data(), rv->getName().data()); + return false; + } + + ilog(DEBUG1, "memorizing global var %s linkage %d for inlining", + rv->getName().data(), (int)rv->getLinkage()); + + importVars.insert(rv->getName()); + /* small cost attributed to each cloned global */ + running_instcount += 5; + } + + visitedFunctions.insert(&F); + + /* + * Check referenced functions. Check whether used static ones are + * inlinable, and remember external ones for inlining. + */ + for (llvm::Function* referencedFunction: referencedFunctions) + { + llvm::StringSet<> recImportVars; + + if (referencedFunction->materialize()) + elog(FATAL, "failed to materialize metadata"); + + if (referencedFunction->isIntrinsic()) + continue; + + /* if already visited skip, otherwise remember */ + if (!visitedFunctions.insert(referencedFunction).second) + continue; + + /* + * We don't inline external functions directly here, instead we put + * them on the worklist if appropriate and check them from + * llvm_build_inline_plan(). + */ + if (referencedFunction->hasExternalLinkage()) + { + llvm::StringRef funcName = referencedFunction->getName(); + + /* + * Don't bother checking for inlining if remaining cost budget is + * very small. + */ + if (subThreshold < 5) + continue; + + auto it = functionStates.find(funcName); + if (it == functionStates.end()) + { + FunctionInlineState inlineState; + + inlineState.costLimit = subThreshold; + inlineState.processed = false; + inlineState.inlined = false; + inlineState.allowReconsidering = false; + + functionStates[funcName] = inlineState; + worklist.push_back({funcName, searchpath}); + + ilog(DEBUG1, + "considering extern function %s at %d for inlining", + funcName.data(), subThreshold); + } + else if (!it->second.inlined && + (!it->second.processed || it->second.allowReconsidering) && + it->second.costLimit < subThreshold) + { + /* + * Update inlining threshold if higher. Need to re-queue + * to be processed if already processed with lower + * threshold. + */ + if (it->second.processed) + { + ilog(DEBUG1, + "reconsidering extern function %s at %d for inlining, increasing from %d", + funcName.data(), subThreshold, it->second.costLimit); + + it->second.processed = false; + it->second.allowReconsidering = false; + worklist.push_back({funcName, searchpath}); + } + it->second.costLimit = subThreshold; + } + continue; + } + + /* can't rely on what may be inlined */ + if (referencedFunction->isInterposable()) + return false; + + if (!function_inlinable(*referencedFunction, + subThreshold, + functionStates, + worklist, + searchpath, + visitedFunctions, + running_instcount, + recImportVars)) + { + ilog(DEBUG1, + "cannot inline %s due to required function %s not being inlinable", + F.getName().data(), referencedFunction->getName().data()); + return false; + } + + /* import referenced function itself */ + importVars.insert(referencedFunction->getName()); + + /* import referenced function and its dependants */ + for (auto& recImportVar : recImportVars) + importVars.insert(recImportVar.first()); + } + + return true; +} + +/* + * Attempt to load module summary located at path. Return empty pointer when + * loading fails. + */ +static std::unique_ptr<llvm::ModuleSummaryIndex> +llvm_load_summary(llvm::StringRef path) +{ + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer> > MBOrErr = + llvm::MemoryBuffer::getFile(path); + + if (std::error_code EC = MBOrErr.getError()) + { + ilog(DEBUG1, "failed to open %s: %s", path.data(), + EC.message().c_str()); + } + else + { + llvm::MemoryBufferRef ref(*MBOrErr.get().get()); + +#if LLVM_VERSION_MAJOR > 3 + llvm::Expected<std::unique_ptr<llvm::ModuleSummaryIndex> > IndexOrErr = + llvm::getModuleSummaryIndex(ref); + if (IndexOrErr) + return std::move(IndexOrErr.get()); + elog(FATAL, "failed to load summary \"%s\": %s", + path.data(), + toString(IndexOrErr.takeError()).c_str()); +#else + llvm::ErrorOr<std::unique_ptr<llvm::ModuleSummaryIndex> > IndexOrErr = + llvm::getModuleSummaryIndex(ref, [](const llvm::DiagnosticInfo &) {}); + if (IndexOrErr) + return std::move(IndexOrErr.get()); + elog(FATAL, "failed to load summary \"%s\": %s", + path.data(), + IndexOrErr.getError().message().c_str()); +#endif + } + return nullptr; +} + +/* + * Attempt to add modpath to the search path. + */ +static void +add_module_to_inline_search_path(InlineSearchPath& searchpath, llvm::StringRef modpath) +{ + /* only extension in libdir are candidates for inlining for now */ + if (!modpath.startswith("$libdir/")) + return; + + /* if there's no match, attempt to load */ + auto it = summary_cache->find(modpath); + if (it == summary_cache->end()) + { + std::string path(modpath); + path = path.replace(0, strlen("$libdir"), std::string(pkglib_path) + "/bitcode"); + path += ".index.bc"; + (*summary_cache)[modpath] = llvm_load_summary(path); + it = summary_cache->find(modpath); + } + + Assert(it != summary_cache->end()); + + /* if the entry isn't NULL, it's validly loaded */ + if (it->second) + searchpath.push_back(it->second.get()); +} + +/* + * Search for all references for functions hashing to guid in the search path, + * and return them in search path order. + */ +static llvm::SmallVector<llvm::GlobalValueSummary *, 1> +summaries_for_guid(const InlineSearchPath& path, llvm::GlobalValue::GUID guid) +{ + llvm::SmallVector<llvm::GlobalValueSummary *, 1> matches; + + for (auto index : path) + { +#if LLVM_VERSION_MAJOR > 4 + llvm::ValueInfo funcVI = index->getValueInfo(guid); + + /* if index doesn't know function, we don't have a body, continue */ + if (funcVI) + for (auto &gv : funcVI.getSummaryList()) + matches.push_back(gv.get()); +#else + const llvm::const_gvsummary_iterator &I = + index->findGlobalValueSummaryList(guid); + if (I != index->end()) + { + for (auto &gv : I->second) + matches.push_back(gv.get()); + } +#endif + } + + return matches; +} + +/* + * Create inline wrapper with the name Name, redirecting the call to F. + */ +static llvm::Function* +create_redirection_function(std::unique_ptr<llvm::Module> &importMod, + llvm::Function *F, + llvm::StringRef Name) +{ + typedef llvm::GlobalValue::LinkageTypes LinkageTypes; + + llvm::LLVMContext &Context = F->getContext(); + llvm::IRBuilder<> Builder(Context); + llvm::Function *AF; + llvm::BasicBlock *BB; + llvm::CallInst *fwdcall; +#if LLVM_VERSION_MAJOR < 14 + llvm::Attribute inlineAttribute; +#endif + + AF = llvm::Function::Create(F->getFunctionType(), + LinkageTypes::AvailableExternallyLinkage, + Name, importMod.get()); + BB = llvm::BasicBlock::Create(Context, "entry", AF); + + Builder.SetInsertPoint(BB); + fwdcall = Builder.CreateCall(F, &*AF->arg_begin()); +#if LLVM_VERSION_MAJOR < 14 + inlineAttribute = llvm::Attribute::get(Context, + llvm::Attribute::AlwaysInline); + fwdcall->addAttribute(~0U, inlineAttribute); +#else + fwdcall->addFnAttr(llvm::Attribute::AlwaysInline); +#endif + Builder.CreateRet(fwdcall); + + return AF; +} diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c new file mode 100644 index 0000000..2deb65c --- /dev/null +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -0,0 +1,139 @@ +/*------------------------------------------------------------------------- + * + * llvmjit_types.c + * List of types needed by JIT emitting code. + * + * JIT emitting code often needs to access struct elements, create functions + * with the correct signature etc. To allow synchronizing these types with a + * low chance of definitions getting out of sync, this file lists types and + * functions that directly need to be accessed from LLVM. + * + * When LLVM is first used in a backend, a bitcode version of this file will + * be loaded. The needed types and signatures will be stored into Struct*, + * Type*, Func* variables. + * + * NB: This file will not be linked into the server, it's just converted to + * bitcode. + * + * + * Copyright (c) 2016-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/jit/llvm/llvmjit_types.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup.h" +#include "access/htup_details.h" +#include "access/tupdesc.h" +#include "catalog/pg_attribute.h" +#include "executor/execExpr.h" +#include "executor/nodeAgg.h" +#include "executor/tuptable.h" +#include "fmgr.h" +#include "nodes/execnodes.h" +#include "nodes/memnodes.h" +#include "utils/expandeddatum.h" +#include "utils/palloc.h" + + +/* + * List of types needed for JITing. These have to be non-static, otherwise + * clang/LLVM will omit them. As this file will never be linked into + * anything, that's harmless. + */ +PGFunction TypePGFunction; +size_t TypeSizeT; +bool TypeStorageBool; +ExprStateEvalFunc TypeExprStateEvalFunc; +ExecEvalSubroutine TypeExecEvalSubroutine; +ExecEvalBoolSubroutine TypeExecEvalBoolSubroutine; + +NullableDatum StructNullableDatum; +AggState StructAggState; +AggStatePerGroupData StructAggStatePerGroupData; +AggStatePerTransData StructAggStatePerTransData; +ExprContext StructExprContext; +ExprEvalStep StructExprEvalStep; +ExprState StructExprState; +FunctionCallInfoBaseData StructFunctionCallInfoData; +HeapTupleData StructHeapTupleData; +MemoryContextData StructMemoryContextData; +TupleTableSlot StructTupleTableSlot; +HeapTupleTableSlot StructHeapTupleTableSlot; +MinimalTupleTableSlot StructMinimalTupleTableSlot; +TupleDescData StructTupleDescData; + + +/* + * To determine which attributes functions need to have (depends e.g. on + * compiler version and settings) to be compatible for inlining, we simply + * copy the attributes of this function. + */ +extern Datum AttributeTemplate(PG_FUNCTION_ARGS); +Datum +AttributeTemplate(PG_FUNCTION_ARGS) +{ + PG_RETURN_NULL(); +} + +/* + * Clang represents stdbool.h style booleans that are returned by functions + * differently (as i1) than stored ones (as i8). Therefore we do not just need + * TypeBool (above), but also a way to determine the width of a returned + * integer. This allows us to keep compatible with non-stdbool using + * architectures. + */ +extern bool FunctionReturningBool(void); +bool +FunctionReturningBool(void) +{ + return false; +} + +/* + * To force signatures of functions used during JITing to be present, + * reference the functions required. This again has to be non-static, to avoid + * being removed as unnecessary. + */ +void *referenced_functions[] = +{ + ExecAggInitGroup, + ExecAggTransReparent, + ExecEvalAggOrderedTransDatum, + ExecEvalAggOrderedTransTuple, + ExecEvalArrayCoerce, + ExecEvalArrayExpr, + ExecEvalConstraintCheck, + ExecEvalConstraintNotNull, + ExecEvalConvertRowtype, + ExecEvalCurrentOfExpr, + ExecEvalFieldSelect, + ExecEvalFieldStoreDeForm, + ExecEvalFieldStoreForm, + ExecEvalFuncExprFusage, + ExecEvalFuncExprStrictFusage, + ExecEvalGroupingFunc, + ExecEvalMinMax, + ExecEvalNextValueExpr, + ExecEvalParamExec, + ExecEvalParamExtern, + ExecEvalRow, + ExecEvalRowNotNull, + ExecEvalRowNull, + ExecEvalSQLValueFunction, + ExecEvalScalarArrayOp, + ExecEvalHashedScalarArrayOp, + ExecEvalSubPlan, + ExecEvalSysVar, + ExecEvalWholeRowVar, + ExecEvalXmlExpr, + MakeExpandedObjectReadOnlyInternal, + slot_getmissingattrs, + slot_getsomeattrs_int, + strlen, + varsize_any, +}; diff --git a/src/backend/jit/llvm/llvmjit_wrap.cpp b/src/backend/jit/llvm/llvmjit_wrap.cpp new file mode 100644 index 0000000..692483d --- /dev/null +++ b/src/backend/jit/llvm/llvmjit_wrap.cpp @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * llvmjit_wrap.cpp + * Parts of the LLVM interface not (yet) exposed to C. + * + * Copyright (c) 2016-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/lib/llvm/llvmjit_wrap.cpp + * + *------------------------------------------------------------------------- + */ + +extern "C" +{ +#include "postgres.h" +} + +#include <llvm-c/Core.h> + +/* Avoid macro clash with LLVM's C++ headers */ +#undef Min + +#include <llvm/IR/Attributes.h> +#include <llvm/IR/Function.h> +#include <llvm/MC/SubtargetFeature.h> +#include <llvm/Support/Host.h> + +#include "jit/llvmjit.h" + + +/* + * C-API extensions. + */ +#if defined(HAVE_DECL_LLVMGETHOSTCPUNAME) && !HAVE_DECL_LLVMGETHOSTCPUNAME +char *LLVMGetHostCPUName(void) { + return strdup(llvm::sys::getHostCPUName().data()); +} +#endif + + +#if defined(HAVE_DECL_LLVMGETHOSTCPUFEATURES) && !HAVE_DECL_LLVMGETHOSTCPUFEATURES +char *LLVMGetHostCPUFeatures(void) { + llvm::SubtargetFeatures Features; + llvm::StringMap<bool> HostFeatures; + + if (llvm::sys::getHostCPUFeatures(HostFeatures)) + for (auto &F : HostFeatures) + Features.AddFeature(F.first(), F.second); + + return strdup(Features.getString().c_str()); +} +#endif + +/* + * Like LLVM's LLVMGetAttributeCountAtIndex(), works around a bug in LLVM 3.9. + * + * In LLVM <= 3.9, LLVMGetAttributeCountAtIndex() segfaults if there are no + * attributes at an index (fixed in LLVM commit ce9bb1097dc2). + */ +unsigned +LLVMGetAttributeCountAtIndexPG(LLVMValueRef F, uint32 Idx) +{ + /* + * This is more expensive, so only do when using a problematic LLVM + * version. + */ +#if LLVM_VERSION_MAJOR < 4 + if (!llvm::unwrap<llvm::Function>(F)->getAttributes().hasAttributes(Idx)) + return 0; +#endif + + /* + * There is no nice public API to determine the count nicely, so just + * always fall back to LLVM's C API. + */ + return LLVMGetAttributeCountAtIndex(F, Idx); +} |