diff options
Diffstat (limited to 'debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch')
-rw-r--r-- | debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch b/debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch new file mode 100644 index 00000000..82f3b5b1 --- /dev/null +++ b/debian/patches/2541-fix-stack-overflow-in-pinbox-allocator.patch @@ -0,0 +1,283 @@ +Forwarded: https://github.com/MariaDB/server/pull/2541 +Origin: https://patch-diff.githubusercontent.com/raw/MariaDB/server/pull/2541.patch +From: Hugo Wen <wenhug@amazon.com> +Date: Sat, 11 Mar 2023 00:27:42 +0000 +Subject: [PATCH] Fix a stack overflow in pinbox allocator + +MariaDB supports a "wait-free concurrent allocator based on pinning addresses". +In `lf_pinbox_real_free()` it tries to sort the pinned addresses for better +performance to use binary search during "real free". `alloca()` was used to +allocate stack memory and copy addresses. + +To prevent a stack overflow when allocating the stack memory the function checks +if there's enough stack space. However, the available stack size was calculated +inaccurately which eventually caused database crash due to stack overflow. + +The crash was seen on MariaDB 10.6.11 but the same code defect exists on all +MariaDB versions. + +A similar issue happened previously and the fix in fc2c1e43 was to add a +`ALLOCA_SAFETY_MARGIN` which is 8192 bytes. However, that safety margin is not +enough during high connection workloads. + +MySQL also had a similar issue and the fix +https://github.com/mysql/mysql-server/commit/b086fda was to remove the use of +`alloca` and replace qsort approach by a linear scan through all pointers (pins) +owned by each thread. + +This commit is mostly the same as it is the only way to solve this issue as: +1. Frame sizes in different architecture can be different. +2. Number of active (non-null) pinned addresses varies, so the frame + size for the recursive sorting function `msort_with_tmp` is also hard + to predict. +3. Allocating big memory blocks in stack doesn't seem to be a very good + practice. + +For further details see the mentioned commit in MySQL and the inline comments. + +All new code of the whole pull request, including one or several files +that are either new files or modified ones, are contributed under the +BSD-new license. I am contributing on behalf of my employer Amazon Web +Services, Inc. +--- + mysys/lf_alloc-pin.c | 180 ++++++++++++++----------------------------- + 1 file changed, 59 insertions(+), 121 deletions(-) + +--- a/mysys/lf_alloc-pin.c ++++ b/mysys/lf_alloc-pin.c +@@ -103,12 +103,6 @@ + #include <lf.h> + #include "my_cpu.h" + +-/* +- when using alloca() leave at least that many bytes of the stack - +- for functions we might be calling from within this stack frame +-*/ +-#define ALLOCA_SAFETY_MARGIN 8192 +- + #define LF_PINBOX_MAX_PINS 65536 + + static void lf_pinbox_real_free(LF_PINS *pins); +@@ -239,24 +233,21 @@ void lf_pinbox_put_pins(LF_PINS *pins) + } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, + (int32*) &top_ver, + top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); +- return; + } + +-static int ptr_cmp(void **a, void **b) ++/* ++ Get the next pointer in the purgatory list. ++ Note that next_node is not used to avoid the extra volatile. ++*/ ++#define pnext_node(P, X) (*((void **)(((char *)(X)) + (P)->free_ptr_offset))) ++ ++static inline void add_to_purgatory(LF_PINS *pins, void *addr) + { +- return *a < *b ? -1 : *a == *b ? 0 : 1; ++ pnext_node(pins->pinbox, addr)= pins->purgatory; ++ pins->purgatory= addr; ++ pins->purgatory_count++; + } + +-#define add_to_purgatory(PINS, ADDR) \ +- do \ +- { \ +- my_atomic_storeptr_explicit( \ +- (void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset), \ +- (PINS)->purgatory, MY_MEMORY_ORDER_RELEASE); \ +- (PINS)->purgatory= (ADDR); \ +- (PINS)->purgatory_count++; \ +- } while (0) +- + /* + Free an object allocated via pinbox allocator + +@@ -274,138 +265,85 @@ void lf_pinbox_free(LF_PINS *pins, void + lf_pinbox_real_free(pins);); + } + +-struct st_harvester { +- void **granary; +- int npins; ++struct st_match_and_save_arg { ++ LF_PINS *pins; ++ LF_PINBOX *pinbox; ++ void *old_purgatory; + }; + + /* +- callback forlf_dynarray_iterate: +- scan all pins of all threads and accumulate all pins ++ Callback for lf_dynarray_iterate: ++ Scan all pins of all threads, for each active (non-null) pin, ++ scan the current thread's purgatory. If present there, move it ++ to a new purgatory. At the end, the old purgatory will contain ++ pointers not pinned by any thread. + */ +-static int harvest_pins(LF_PINS *el, struct st_harvester *hv) ++static int match_and_save(LF_PINS *el, struct st_match_and_save_arg *arg) + { + int i; +- LF_PINS *el_end= el+MY_MIN(hv->npins, LF_DYNARRAY_LEVEL_LENGTH); ++ LF_PINS *el_end= el + LF_DYNARRAY_LEVEL_LENGTH; + for (; el < el_end; el++) + { + for (i= 0; i < LF_PINBOX_PINS; i++) + { + void *p= el->pin[i]; + if (p) +- *hv->granary++= p; ++ { ++ void *cur= arg->old_purgatory; ++ void **list_prev= &arg->old_purgatory; ++ while (cur) ++ { ++ void *next= pnext_node(arg->pinbox, cur); ++ ++ if (p == cur) ++ { ++ /* pinned - keeping */ ++ add_to_purgatory(arg->pins, cur); ++ /* unlink from old purgatory */ ++ *list_prev= next; ++ } ++ else ++ list_prev= (void **)((char *)cur+arg->pinbox->free_ptr_offset); ++ cur= next; ++ } ++ if (!arg->old_purgatory) ++ return 1; ++ } + } + } +- /* +- hv->npins may become negative below, but it means that +- we're on the last dynarray page and harvest_pins() won't be +- called again. We don't bother to make hv->npins() correct +- (that is 0) in this case. +- */ +- hv->npins-= LF_DYNARRAY_LEVEL_LENGTH; + return 0; + } + + /* +- callback forlf_dynarray_iterate: +- scan all pins of all threads and see if addr is present there +-*/ +-static int match_pins(LF_PINS *el, void *addr) +-{ +- int i; +- LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH; +- for (; el < el_end; el++) +- for (i= 0; i < LF_PINBOX_PINS; i++) +- if (el->pin[i] == addr) +- return 1; +- return 0; +-} +- +-#define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset))) +-#define anext_node(X) next_node(&allocator->pinbox, (X)) +- +-/* + Scan the purgatory and free everything that can be freed + */ + static void lf_pinbox_real_free(LF_PINS *pins) + { +- int npins; +- void *list; +- void **addr= NULL; +- void *first= NULL, *last= NULL; +- struct st_my_thread_var *var= my_thread_var; +- void *stack_ends_here= var ? var->stack_ends_here : NULL; + LF_PINBOX *pinbox= pins->pinbox; + +- npins= pinbox->pins_in_array+1; ++ /* Store info about current purgatory. */ ++ struct st_match_and_save_arg arg = {pins, pinbox, pins->purgatory}; ++ /* Reset purgatory. */ ++ pins->purgatory= NULL; ++ pins->purgatory_count= 0; + +-#ifdef HAVE_ALLOCA +- if (stack_ends_here != NULL) +- { +- int alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins; +- /* create a sorted list of pinned addresses, to speed up searches */ +- if (available_stack_size(&pinbox, stack_ends_here) > +- alloca_size + ALLOCA_SAFETY_MARGIN) +- { +- struct st_harvester hv; +- addr= (void **) alloca(alloca_size); +- hv.granary= addr; +- hv.npins= npins; +- /* scan the dynarray and accumulate all pinned addresses */ +- lf_dynarray_iterate(&pinbox->pinarray, +- (lf_dynarray_func)harvest_pins, &hv); +- +- npins= (int)(hv.granary-addr); +- /* and sort them */ +- if (npins) +- qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); +- } +- } +-#endif + +- list= pins->purgatory; +- pins->purgatory= 0; +- pins->purgatory_count= 0; +- while (list) ++ lf_dynarray_iterate(&pinbox->pinarray, ++ (lf_dynarray_func)match_and_save, &arg); ++ ++ if (arg.old_purgatory) + { +- void *cur= list; +- list= *(void **)((char *)cur+pinbox->free_ptr_offset); +- if (npins) +- { +- if (addr) /* use binary search */ +- { +- void **a, **b, **c; +- for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2) +- if (cur == *c) +- a= b= c; +- else if (cur > *c) +- a= c; +- else +- b= c; +- if (cur == *a || cur == *b) +- goto found; +- } +- else /* no alloca - no cookie. linear search here */ +- { +- if (lf_dynarray_iterate(&pinbox->pinarray, +- (lf_dynarray_func)match_pins, cur)) +- goto found; +- } +- } +- /* not pinned - freeing */ +- if (last) +- last= next_node(pinbox, last)= (uchar *)cur; +- else +- first= last= (uchar *)cur; +- continue; +-found: +- /* pinned - keeping */ +- add_to_purgatory(pins, cur); ++ /* Some objects in the old purgatory were not pinned, free them. */ ++ void *last= arg.old_purgatory; ++ while (pnext_node(pinbox, last)) ++ last= pnext_node(pinbox, last); ++ pinbox->free_func(arg.old_purgatory, last, pinbox->free_func_arg); + } +- if (last) +- pinbox->free_func(first, last, pinbox->free_func_arg); + } + ++#define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset))) ++#define anext_node(X) next_node(&allocator->pinbox, (X)) ++ + /* lock-free memory allocator for fixed-size objects */ + + /* |