summaryrefslogtreecommitdiffstats
path: root/debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch')
-rw-r--r--debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch283
1 files changed, 283 insertions, 0 deletions
diff --git a/debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch b/debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch
new file mode 100644
index 00000000..82f3b5b1
--- /dev/null
+++ b/debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch
@@ -0,0 +1,283 @@
+Forwarded: https://github.com/MariaDB/server/pull/2541
+Origin: https://patch-diff.githubusercontent.com/raw/MariaDB/server/pull/2541.patch
+From: Hugo Wen <wenhug@amazon.com>
+Date: Sat, 11 Mar 2023 00:27:42 +0000
+Subject: [PATCH] Fix a stack overflow in pinbox allocator
+
+MariaDB supports a "wait-free concurrent allocator based on pinning addresses".
+In `lf_pinbox_real_free()` it tries to sort the pinned addresses for better
+performance to use binary search during "real free". `alloca()` was used to
+allocate stack memory and copy addresses.
+
+To prevent a stack overflow when allocating the stack memory the function checks
+if there's enough stack space. However, the available stack size was calculated
+inaccurately which eventually caused database crash due to stack overflow.
+
+The crash was seen on MariaDB 10.6.11 but the same code defect exists on all
+MariaDB versions.
+
+A similar issue happened previously and the fix in fc2c1e43 was to add a
+`ALLOCA_SAFETY_MARGIN` which is 8192 bytes. However, that safety margin is not
+enough during high connection workloads.
+
+MySQL also had a similar issue and the fix
+https://github.com/mysql/mysql-server/commit/b086fda was to remove the use of
+`alloca` and replace qsort approach by a linear scan through all pointers (pins)
+owned by each thread.
+
+This commit is mostly the same as it is the only way to solve this issue as:
+1. Frame sizes in different architecture can be different.
+2. Number of active (non-null) pinned addresses varies, so the frame
+ size for the recursive sorting function `msort_with_tmp` is also hard
+ to predict.
+3. Allocating big memory blocks in stack doesn't seem to be a very good
+ practice.
+
+For further details see the mentioned commit in MySQL and the inline comments.
+
+All new code of the whole pull request, including one or several files
+that are either new files or modified ones, are contributed under the
+BSD-new license. I am contributing on behalf of my employer Amazon Web
+Services, Inc.
+---
+ mysys/lf_alloc-pin.c | 180 ++++++++++++++-----------------------------
+ 1 file changed, 59 insertions(+), 121 deletions(-)
+
+--- a/mysys/lf_alloc-pin.c
++++ b/mysys/lf_alloc-pin.c
+@@ -103,12 +103,6 @@
+ #include <lf.h>
+ #include "my_cpu.h"
+
+-/*
+- when using alloca() leave at least that many bytes of the stack -
+- for functions we might be calling from within this stack frame
+-*/
+-#define ALLOCA_SAFETY_MARGIN 8192
+-
+ #define LF_PINBOX_MAX_PINS 65536
+
+ static void lf_pinbox_real_free(LF_PINS *pins);
+@@ -239,24 +233,21 @@ void lf_pinbox_put_pins(LF_PINS *pins)
+ } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver,
+ (int32*) &top_ver,
+ top_ver-pins->link+nr+LF_PINBOX_MAX_PINS));
+- return;
+ }
+
+-static int ptr_cmp(void **a, void **b)
++/*
++ Get the next pointer in the purgatory list.
++ Note that next_node is not used to avoid the extra volatile.
++*/
++#define pnext_node(P, X) (*((void **)(((char *)(X)) + (P)->free_ptr_offset)))
++
++static inline void add_to_purgatory(LF_PINS *pins, void *addr)
+ {
+- return *a < *b ? -1 : *a == *b ? 0 : 1;
++ pnext_node(pins->pinbox, addr)= pins->purgatory;
++ pins->purgatory= addr;
++ pins->purgatory_count++;
+ }
+
+-#define add_to_purgatory(PINS, ADDR) \
+- do \
+- { \
+- my_atomic_storeptr_explicit( \
+- (void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset), \
+- (PINS)->purgatory, MY_MEMORY_ORDER_RELEASE); \
+- (PINS)->purgatory= (ADDR); \
+- (PINS)->purgatory_count++; \
+- } while (0)
+-
+ /*
+ Free an object allocated via pinbox allocator
+
+@@ -274,138 +265,85 @@ void lf_pinbox_free(LF_PINS *pins, void
+ lf_pinbox_real_free(pins););
+ }
+
+-struct st_harvester {
+- void **granary;
+- int npins;
++struct st_match_and_save_arg {
++ LF_PINS *pins;
++ LF_PINBOX *pinbox;
++ void *old_purgatory;
+ };
+
+ /*
+- callback forlf_dynarray_iterate:
+- scan all pins of all threads and accumulate all pins
++ Callback for lf_dynarray_iterate:
++ Scan all pins of all threads, for each active (non-null) pin,
++ scan the current thread's purgatory. If present there, move it
++ to a new purgatory. At the end, the old purgatory will contain
++ pointers not pinned by any thread.
+ */
+-static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
++static int match_and_save(LF_PINS *el, struct st_match_and_save_arg *arg)
+ {
+ int i;
+- LF_PINS *el_end= el+MY_MIN(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
++ LF_PINS *el_end= el + LF_DYNARRAY_LEVEL_LENGTH;
+ for (; el < el_end; el++)
+ {
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ {
+ void *p= el->pin[i];
+ if (p)
+- *hv->granary++= p;
++ {
++ void *cur= arg->old_purgatory;
++ void **list_prev= &arg->old_purgatory;
++ while (cur)
++ {
++ void *next= pnext_node(arg->pinbox, cur);
++
++ if (p == cur)
++ {
++ /* pinned - keeping */
++ add_to_purgatory(arg->pins, cur);
++ /* unlink from old purgatory */
++ *list_prev= next;
++ }
++ else
++ list_prev= (void **)((char *)cur+arg->pinbox->free_ptr_offset);
++ cur= next;
++ }
++ if (!arg->old_purgatory)
++ return 1;
++ }
+ }
+ }
+- /*
+- hv->npins may become negative below, but it means that
+- we're on the last dynarray page and harvest_pins() won't be
+- called again. We don't bother to make hv->npins() correct
+- (that is 0) in this case.
+- */
+- hv->npins-= LF_DYNARRAY_LEVEL_LENGTH;
+ return 0;
+ }
+
+ /*
+- callback forlf_dynarray_iterate:
+- scan all pins of all threads and see if addr is present there
+-*/
+-static int match_pins(LF_PINS *el, void *addr)
+-{
+- int i;
+- LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH;
+- for (; el < el_end; el++)
+- for (i= 0; i < LF_PINBOX_PINS; i++)
+- if (el->pin[i] == addr)
+- return 1;
+- return 0;
+-}
+-
+-#define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset)))
+-#define anext_node(X) next_node(&allocator->pinbox, (X))
+-
+-/*
+ Scan the purgatory and free everything that can be freed
+ */
+ static void lf_pinbox_real_free(LF_PINS *pins)
+ {
+- int npins;
+- void *list;
+- void **addr= NULL;
+- void *first= NULL, *last= NULL;
+- struct st_my_thread_var *var= my_thread_var;
+- void *stack_ends_here= var ? var->stack_ends_here : NULL;
+ LF_PINBOX *pinbox= pins->pinbox;
+
+- npins= pinbox->pins_in_array+1;
++ /* Store info about current purgatory. */
++ struct st_match_and_save_arg arg = {pins, pinbox, pins->purgatory};
++ /* Reset purgatory. */
++ pins->purgatory= NULL;
++ pins->purgatory_count= 0;
+
+-#ifdef HAVE_ALLOCA
+- if (stack_ends_here != NULL)
+- {
+- int alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins;
+- /* create a sorted list of pinned addresses, to speed up searches */
+- if (available_stack_size(&pinbox, stack_ends_here) >
+- alloca_size + ALLOCA_SAFETY_MARGIN)
+- {
+- struct st_harvester hv;
+- addr= (void **) alloca(alloca_size);
+- hv.granary= addr;
+- hv.npins= npins;
+- /* scan the dynarray and accumulate all pinned addresses */
+- lf_dynarray_iterate(&pinbox->pinarray,
+- (lf_dynarray_func)harvest_pins, &hv);
+-
+- npins= (int)(hv.granary-addr);
+- /* and sort them */
+- if (npins)
+- qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp);
+- }
+- }
+-#endif
+
+- list= pins->purgatory;
+- pins->purgatory= 0;
+- pins->purgatory_count= 0;
+- while (list)
++ lf_dynarray_iterate(&pinbox->pinarray,
++ (lf_dynarray_func)match_and_save, &arg);
++
++ if (arg.old_purgatory)
+ {
+- void *cur= list;
+- list= *(void **)((char *)cur+pinbox->free_ptr_offset);
+- if (npins)
+- {
+- if (addr) /* use binary search */
+- {
+- void **a, **b, **c;
+- for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2)
+- if (cur == *c)
+- a= b= c;
+- else if (cur > *c)
+- a= c;
+- else
+- b= c;
+- if (cur == *a || cur == *b)
+- goto found;
+- }
+- else /* no alloca - no cookie. linear search here */
+- {
+- if (lf_dynarray_iterate(&pinbox->pinarray,
+- (lf_dynarray_func)match_pins, cur))
+- goto found;
+- }
+- }
+- /* not pinned - freeing */
+- if (last)
+- last= next_node(pinbox, last)= (uchar *)cur;
+- else
+- first= last= (uchar *)cur;
+- continue;
+-found:
+- /* pinned - keeping */
+- add_to_purgatory(pins, cur);
++ /* Some objects in the old purgatory were not pinned, free them. */
++ void *last= arg.old_purgatory;
++ while (pnext_node(pinbox, last))
++ last= pnext_node(pinbox, last);
++ pinbox->free_func(arg.old_purgatory, last, pinbox->free_func_arg);
+ }
+- if (last)
+- pinbox->free_func(first, last, pinbox->free_func_arg);
+ }
+
++#define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset)))
++#define anext_node(X) next_node(&allocator->pinbox, (X))
++
+ /* lock-free memory allocator for fixed-size objects */
+
+ /*