Origin: https://github.com/git/git/commit/a244dc5b0a629290881641467c7a545de7508ab2 Origin: https://github.com/git/git/commit/81dc898df9b4b4035534a927f3234a3839b698bf Origin: https://github.com/git/git/commit/b49f309aa16febeddb65e82526640a91bbba3be3 Origin: https://github.com/git/git/commit/f6e0b9f38987ad5e47bab551f8760b70689a5905 Origin: https://github.com/git/git/commit/1de69c0cdd388b0a5b7bdde0bfa0bda514a354b0 Origin: https://github.com/git/git/commit/48050c42c73c28b0c001d63d11dffac7e116847b Origin: https://github.com/git/git/commit/522cc87fdc25449222a5894a428eebf4b8d5eaa9 Origin: https://github.com/git/git/commit/17d23e8a3812a5ca3dd6564e74d5250f22e5d76d Origin: https://github.com/git/git/commit/937b71cc8b5b998963a7f9a33312ba3549d55510 Origin: https://github.com/git/git/commit/81c2d4c3a5ba0e6ab8c348708441fed170e63a82 Origin: https://github.com/git/git/commit/f930a2394303b902e2973f4308f96529f736b8bc Origin: https://github.com/git/git/commit/304a50adff6480ede46b68f7545baab542cbfb46 Reviewed-by: Aron Xu Last-Updated: 2023-01-26 diff --git a/column.c b/column.c index 1261e18..fbf8863 100644 --- a/column.c +++ b/column.c @@ -23,7 +23,7 @@ struct column_data { /* return length of 's' in letters, ANSI escapes stripped */ static int item_length(const char *s) { - return utf8_strnwidth(s, -1, 1); + return utf8_strnwidth(s, strlen(s), 1); } /* diff --git a/git-compat-util.h b/git-compat-util.h index f505f81..0ac1b7f 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -918,6 +918,14 @@ static inline size_t st_sub(size_t a, size_t b) return a - b; } +static inline int cast_size_t_to_int(size_t a) +{ + if (a > INT_MAX) + die("number too large to represent as int on this platform: %"PRIuMAX, + (uintmax_t)a); + return (int)a; +} + #ifdef HAVE_ALLOCA_H # include # define xalloca(size) (alloca(size)) diff --git a/pretty.c b/pretty.c index 7a7708a..e228557 100644 --- a/pretty.c +++ b/pretty.c @@ -13,6 +13,13 @@ #include "gpg-interface.h" #include "trailer.h" +/* + * The limit for formatting directives, which enable the caller to append + * arbitrarily many bytes to the formatted buffer. This includes padding + * and wrapping formatters. + */ +#define FORMATTING_LIMIT (16 * 1024) + static char *user_format; static struct cmt_fmt_map { const char *name; @@ -915,7 +922,9 @@ static void strbuf_wrap(struct strbuf *sb, size_t pos, if (pos) strbuf_add(&tmp, sb->buf, pos); strbuf_add_wrapped_text(&tmp, sb->buf + pos, - (int) indent1, (int) indent2, (int) width); + cast_size_t_to_int(indent1), + cast_size_t_to_int(indent2), + cast_size_t_to_int(width)); strbuf_swap(&tmp, sb); strbuf_release(&tmp); } @@ -1041,9 +1050,18 @@ static size_t parse_padding_placeholder(const char *placeholder, const char *end = start + strcspn(start, ",)"); char *next; int width; - if (!end || end == start) + if (!*end || end == start) return 0; width = strtol(start, &next, 10); + + /* + * We need to limit the amount of padding, or otherwise this + * would allow the user to pad the buffer by arbitrarily many + * bytes and thus cause resource exhaustion. + */ + if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT) + return 0; + if (next == start || width == 0) return 0; if (width < 0) { @@ -1203,6 +1221,16 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ if (*next != ')') return 0; } + + /* + * We need to limit the format here as it allows the + * user to prepend arbitrarily many bytes to the buffer + * when rewrapping. + */ + if (width > FORMATTING_LIMIT || + indent1 > FORMATTING_LIMIT || + indent2 > FORMATTING_LIMIT) + return 0; rewrap_message_tail(sb, c, width, indent1, indent2); return end - placeholder + 1; } else @@ -1473,19 +1501,21 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ struct format_commit_context *c) { struct strbuf local_sb = STRBUF_INIT; - int total_consumed = 0, len, padding = c->padding; + size_t total_consumed = 0; + int len, padding = c->padding; + if (padding < 0) { const char *start = strrchr(sb->buf, '\n'); int occupied; if (!start) start = sb->buf; - occupied = utf8_strnwidth(start, -1, 1); + occupied = utf8_strnwidth(start, strlen(start), 1); occupied += c->pretty_ctx->graph_width; padding = (-padding) - occupied; } while (1) { int modifier = *placeholder == 'C'; - int consumed = format_commit_one(&local_sb, placeholder, c); + size_t consumed = format_commit_one(&local_sb, placeholder, c); total_consumed += consumed; if (!modifier) @@ -1497,7 +1527,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ placeholder++; total_consumed++; } - len = utf8_strnwidth(local_sb.buf, -1, 1); + len = utf8_strnwidth(local_sb.buf, local_sb.len, 1); if (c->flush_type == flush_left_and_steal) { const char *ch = sb->buf + sb->len - 1; @@ -1512,7 +1542,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ if (*ch != 'm') break; p = ch - 1; - while (ch - p < 10 && *p != '\033') + while (p > sb->buf && ch - p < 10 && *p != '\033') p--; if (*p != '\033' || ch + 1 - p != display_mode_esc_sequence_len(p)) @@ -1551,7 +1581,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ } strbuf_addbuf(sb, &local_sb); } else { - int sb_len = sb->len, offset = 0; + size_t sb_len = sb->len, offset = 0; if (c->flush_type == flush_left) offset = padding - len; else if (c->flush_type == flush_both) @@ -1574,8 +1604,7 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */ const char *placeholder, void *context) { - int consumed; - size_t orig_len; + size_t consumed, orig_len; enum { NO_MAGIC, ADD_LF_BEFORE_NON_EMPTY, @@ -1596,9 +1625,21 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */ default: break; } - if (magic != NO_MAGIC) + if (magic != NO_MAGIC) { placeholder++; + switch (placeholder[0]) { + case 'w': + /* + * `%+w()` cannot ever expand to a non-empty string, + * and it potentially changes the layout of preceding + * contents. We're thus not able to handle the magic in + * this combination and refuse the pattern. + */ + return 0; + }; + } + orig_len = sb->len; if (((struct format_commit_context *)context)->flush_type != no_flush) consumed = format_and_pad_commit(sb, placeholder, context); diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index 204c149..84c61df 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -867,4 +867,80 @@ test_expect_success 'log --pretty=reference is colored appropriately' ' test_cmp expect actual ' +test_expect_success 'log --pretty with space stealing' ' + printf mm0 >expect && + git log -1 --pretty="format:mm%>>|(1)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with invalid padding format' ' + printf "%s%%<(20" "$(git rev-parse HEAD)" >expect && + git log -1 --pretty="format:%H%<(20" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with magical wrapping directives' ' + commit_id=$(git commit-tree HEAD^{tree} -m "describe me") && + git tag describe-me $commit_id && + printf "\n(tag:\ndescribe-me)%%+w(2)" >expect && + git log -1 --pretty="format:%w(1)%+d%+w(2)" $commit_id >actual && + test_cmp expect actual +' + +test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping directive' ' + printf "%%w(2147483649,1,1)0" >expect && + git log -1 --pretty="format:%w(2147483649,1,1)%x30" >actual && + test_cmp expect actual && + printf "%%w(1,2147483649,1)0" >expect && + git log -1 --pretty="format:%w(1,2147483649,1)%x30" >actual && + test_cmp expect actual && + printf "%%w(1,1,2147483649)0" >expect && + git log -1 --pretty="format:%w(1,1,2147483649)%x30" >actual && + test_cmp expect actual +' + +test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing padding directive' ' + printf "%%<(2147483649)0" >expect && + git log -1 --pretty="format:%<(2147483649)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with padding and preceding control chars' ' + printf "\20\20 0" >expect && + git log -1 --pretty="format:%x10%x10%>|(4)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty truncation with control chars' ' + test_commit "$(printf "\20\20\20\20xxxx")" file contents commit-with-control-chars && + printf "\20\20\20\20x.." >expect && + git log -1 --pretty="format:%<(3,trunc)%s" commit-with-control-chars >actual && + test_cmp expect actual +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' + # We only assert that this command does not crash. This needs to be + # executed with the address sanitizer to demonstrate failure. + git log -1 --pretty="format:%>(2147483646)%x41%41%>(2147483646)%x41" >/dev/null +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'set up huge commit' ' + test-tool genzeros 2147483649 | tr "\000" "1" >expect && + huge_commit=$(git commit-tree -F expect HEAD^{tree}) +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' + git log -1 --format="%B%<(1)%x30" $huge_commit >actual && + echo 0 >>expect && + test_cmp expect actual +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message does not cause allocation failure' ' + test_must_fail git log -1 --format="%<(1)%B" $huge_commit 2>error && + cat >expect <<-EOF && + fatal: number too large to represent as int on this platform: 2147483649 + EOF + test_cmp expect error +' + test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index 9fa7c1d..7d6e0f8 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1686,6 +1686,10 @@ build_option () { sed -ne "s/^$1: //p" } +test_lazy_prereq SIZE_T_IS_64BIT ' + test 8 -eq "$(build_option sizeof-size_t)" +' + test_lazy_prereq LONG_IS_64BIT ' test 8 -le "$(build_option sizeof-long)" ' diff --git a/utf8.c b/utf8.c index 5b39361..d8a16af 100644 --- a/utf8.c +++ b/utf8.c @@ -206,26 +206,34 @@ int utf8_width(const char **start, size_t *remainder_p) * string, assuming that the string is utf8. Returns strlen() instead * if the string does not look like a valid utf8 string. */ -int utf8_strnwidth(const char *string, int len, int skip_ansi) +int utf8_strnwidth(const char *string, size_t len, int skip_ansi) { - int width = 0; const char *orig = string; + size_t width = 0; - if (len == -1) - len = strlen(string); while (string && string < orig + len) { - int skip; + int glyph_width; + size_t skip; + while (skip_ansi && (skip = display_mode_esc_sequence_len(string)) != 0) string += skip; - width += utf8_width(&string, NULL); + + glyph_width = utf8_width(&string, NULL); + if (glyph_width > 0) + width += glyph_width; } - return string ? width : len; + + /* + * TODO: fix the interface of this function and `utf8_strwidth()` to + * return `size_t` instead of `int`. + */ + return cast_size_t_to_int(string ? width : len); } int utf8_strwidth(const char *string) { - return utf8_strnwidth(string, -1, 0); + return utf8_strnwidth(string, strlen(string), 0); } int is_utf8(const char *text) @@ -357,51 +365,52 @@ void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, const char *subst) { - struct strbuf sb_dst = STRBUF_INIT; - char *src = sb_src->buf; - char *end = src + sb_src->len; - char *dst; - int w = 0, subst_len = 0; + const char *src = sb_src->buf, *end = sb_src->buf + sb_src->len; + struct strbuf dst; + int w = 0; - if (subst) - subst_len = strlen(subst); - strbuf_grow(&sb_dst, sb_src->len + subst_len); - dst = sb_dst.buf; + strbuf_init(&dst, sb_src->len); while (src < end) { - char *old; + const char *old; + int glyph_width; size_t n; while ((n = display_mode_esc_sequence_len(src))) { - memcpy(dst, src, n); + strbuf_add(&dst, src, n); src += n; - dst += n; } if (src >= end) break; old = src; - n = utf8_width((const char**)&src, NULL); - if (!src) /* broken utf-8, do nothing */ + glyph_width = utf8_width((const char**)&src, NULL); + if (!src) /* broken utf-8, do nothing */ goto out; - if (n && w >= pos && w < pos + width) { + + /* + * In case we see a control character we copy it into the + * buffer, but don't add it to the width. + */ + if (glyph_width < 0) + glyph_width = 0; + + if (glyph_width && w >= pos && w < pos + width) { if (subst) { - memcpy(dst, subst, subst_len); - dst += subst_len; + strbuf_addstr(&dst, subst); subst = NULL; } - w += n; - continue; + } else { + strbuf_add(&dst, old, src - old); } - memcpy(dst, old, src - old); - dst += src - old; - w += n; + + w += glyph_width; } - strbuf_setlen(&sb_dst, dst - sb_dst.buf); - strbuf_swap(sb_src, &sb_dst); + + strbuf_swap(sb_src, &dst); out: - strbuf_release(&sb_dst); + strbuf_release(&dst); } /* @@ -791,7 +800,7 @@ int skip_utf8_bom(char **text, size_t len) void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width, const char *s) { - int slen = strlen(s); + size_t slen = strlen(s); int display_len = utf8_strnwidth(s, slen, 0); int utf8_compensation = slen - display_len; diff --git a/utf8.h b/utf8.h index fcd5167..6da1b6d 100644 --- a/utf8.h +++ b/utf8.h @@ -7,7 +7,7 @@ typedef unsigned int ucs_char_t; /* assuming 32bit int */ size_t display_mode_esc_sequence_len(const char *s); int utf8_width(const char **start, size_t *remainder_p); -int utf8_strnwidth(const char *string, int len, int skip_ansi); +int utf8_strnwidth(const char *string, size_t len, int skip_ansi); int utf8_strwidth(const char *string); int is_utf8(const char *text); int is_encoding_utf8(const char *name);