diff options
Diffstat (limited to 'src/viewer/ascii.c')
-rw-r--r-- | src/viewer/ascii.c | 1049 |
1 files changed, 1049 insertions, 0 deletions
diff --git a/src/viewer/ascii.c b/src/viewer/ascii.c new file mode 100644 index 0000000..52fa41d --- /dev/null +++ b/src/viewer/ascii.c @@ -0,0 +1,1049 @@ +/* + Internal file viewer for the Midnight Commander + Function for plain view + + Copyright (C) 1994-2022 + Free Software Foundation, Inc. + + Written by: + Miguel de Icaza, 1994, 1995, 1998 + Janne Kukonlehto, 1994, 1995 + Jakub Jelinek, 1995 + Joseph M. Hinkle, 1996 + Norbert Warmuth, 1997 + Pavel Machek, 1998 + Roland Illig <roland.illig@gmx.de>, 2004, 2005 + Slava Zanko <slavazanko@google.com>, 2009 + Andrew Borodin <aborodin@vmail.ru>, 2009-2022 + Ilia Maslakov <il.smind@gmail.com>, 2009 + Rewritten almost from scratch by: + Egmont Koblinger <egmont@gmail.com>, 2014 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + + ------------------------------------------------------------------------------------------------ + + The viewer is implemented along the following design principles: + + Goals: Always display simple scripts, double wide (CJK), combining accents and spacing marks + (often used e.g. in Devanagari) perfectly. Make the arrow keys always work correctly. + + Absolutely non-goal: RTL. + + Terminology: + + - A "paragraph" is the text between two adjacent newline characters. A "line" or "row" is a + visual row on the screen. In wrap mode, the viewer formats a paragraph into one or more lines. + + - The Unicode glossary <http://www.unicode.org/glossary/> doesn't seem to have a notion of "base + character followed by zero or more combining characters". The closest matches are "Combining + Character Sequence" meaning a base character followed by one or more combining characters, or + "Grapheme" which seems to exclude non-printable characters such as newline. In this file, + "combining character sequence" (or any obvious abbreviation thereof) means a base character + followed by zero or more (up to a current limit of 4) combining characters. + + ------------------------------------------------------------------------------------------------ + + The parser-formatter is designed to be stateless across paragraphs. This is so that we can walk + backwards without having to reparse the whole file (although we still need to reparse and + reformat the whole paragraph, but it's a lot better). This principle needs to be changed if we + ever get to address tickets 1849/2977, but then we can still store (for efficiency) the parser + state at the beginning of the paragraph, and safely walk backwards if we don't cross an escape + character. + + The parser-formatter, however, definitely needs to carry a state across lines. Currently this + state contains: + + - The logical column (as if we didn't wrap). This is used for handling TAB characters after a + wordwrap consistently with less. + + - Whether the last nroff character was bold or underlined. This is used for displaying the + ambiguous _\b_ sequence consistently with less. + + - Whether the desired way of displaying a lonely combining accent or spacing mark is to place it + over a dotted circle (we do this at the beginning of the paragraph of after a TAB), or to ignore + the combining char and show replacement char for the spacing mark (we do this if e.g. too many + of these were encountered and hence we don't glue them with their base character). + + - (This state needs to be expanded if e.g. we decide to print verbose replacement characters + (e.g. "<U+0080>") and allow these to wrap around lines.) + + The state also contains the file offset, as it doesn't make sense to ever know the state without + knowing the corresponding offset. + + The state depends on various settings (viewer width, encoding, nroff mode, charwrap or wordwrap + mode (if we'll have that one day) etc.), needs to be recomputed if any of these changes. + + Walking forwards is usually relatively easy both in the file and on the screen. Walking + backwards within a paragraph would only be possible in some special cases and even then it would + be painful, so we always walk back to the beginning of the paragraph and reparse-reformat from + there. + + (Walking back within a line in the file would have at least the following difficulties: handling + the parser state; processing invalid UTF-8; processing invalid nroff (e.g. what is "_\bA\bA"?). + Walking back on the display: we wouldn't know where to display the last line of a paragraph, or + where to display a line if its following line starts with a wide (CJK or Tab) character. Long + story short: just forget this approach.) + + Most important variables: + + - dpy_start: Both in unwrap and wrap modes this points to the beginning of the topmost displayed + paragraph. + + - dpy_text_column: Only in unwrap mode, an additional horizontal scroll. + + - dpy_paragraph_skip_lines: Only in wrap mode, an additional vertical scroll (the number of + lines that are scrolled off at the top from the topmost paragraph). + + - dpy_state_top: Only in wrap mode, the offset and parser-formatter state at the line where + displaying the file begins is cached here. + + - dpy_wrap_dirty: If some parameter has changed that makes it necessary to reparse-redisplay the + topmost paragraph. + + In wrap mode, the three variables "dpy_start", "dpy_paragraph_skip_lines" and "dpy_state_top" + are kept consistent. Think of the first two as the ones describing the position, and the third + as a cached value for better performance so that we don't need to wrap the invisible beginning + of the topmost paragraph over and over again. The third value needs to be recomputed each time a + parameter that influences parsing or displaying the file (e.g. width of screen, encoding, nroff + mode) changes, this is signaled by "dpy_wrap_dirty" to force recomputing "dpy_state_top" (and + clamp "dpy_paragraph_skip_lines" if necessary). + + ------------------------------------------------------------------------------------------------ + + Help integration + + I'm planning to port the help viewer to this codebase. + + Splitting at sections would still happen in the help viewer. It would either copy a section, or + set force_max and a similar force_min to limit displaying to one section only. + + Parsing the help format would go next to the nroff parser. The colors, alternate character set, + and emitting the version number would go to the "state". (The version number would be + implemented by emitting remaining characters of a buffer in the "state" one by one, without + advancing in the file position.) + + The active link would be drawn similarly to the search highlight. Other than that, the viewer + wouldn't care about links (except for their color). help.c would keep track of which one is + highlighted, how to advance to the next/prev on an arrow, how the scroll offset needs to be + adjusted when moving, etc. + + Add wrapping at word boundaries to where wrapping at char boundaries happens now. + */ + +#include <config.h> + +#include "lib/global.h" +#include "lib/tty/tty.h" +#include "lib/skin.h" +#include "lib/util.h" /* is_printable() */ +#ifdef HAVE_CHARSET +#include "lib/charsets.h" +#endif + +#include "src/setup.h" /* option_tab_spacing */ + +#include "internal.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/* The Unicode standard recommends that lonely combining characters are printed over a dotted + * circle. If the terminal is not UTF-8, this will be replaced by a dot anyway. */ +#define BASE_CHARACTER_FOR_LONELY_COMBINING 0x25CC /* dotted circle */ +#define MAX_COMBINING_CHARS 4 /* both slang and ncurses support exactly 4 */ + +/* I think anything other than space (e.g. arrows) just introduce visual clutter without actually + * adding value. */ +#define PARTIAL_CJK_AT_LEFT_MARGIN ' ' +#define PARTIAL_CJK_AT_RIGHT_MARGIN ' ' + +/* + * Wrap mode: This is for safety so that jumping to the end of file (which already includes + * scrolling back by a page) and then walking backwards is reasonably fast, even if the file is + * extremely large and consists of maybe full zeros or something like that. If there's no newline + * found within this limit, just start displaying from there and see what happens. We might get + * some displaying parameteres (most importantly the columns) incorrect, but at least will show the + * file without spinning the CPU for ages. When scrolling back to that point, the user might see a + * garbled first line (even starting with an invalid partial UTF-8), but then walking back by yet + * another line should fix it. + * + * Unwrap mode: This is not used, we wouldn't be able to do anything reasonable without walking + * back a whole paragraph (well, view->data_area.height paragraphs actually). + */ +#define MAX_BACKWARDS_WALK_IN_PARAGRAPH (100 * 1000) + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/* TODO: These methods shouldn't be necessary, see ticket 3257 */ + +static int +mcview_wcwidth (const WView * view, int c) +{ +#ifdef HAVE_CHARSET + if (view->utf8) + { + if (g_unichar_iswide (c)) + return 2; + if (g_unichar_iszerowidth (c)) + return 0; + } +#else + (void) view; + (void) c; +#endif /* HAVE_CHARSET */ + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +mcview_ismark (const WView * view, int c) +{ +#ifdef HAVE_CHARSET + if (view->utf8) + return g_unichar_ismark (c); +#else + (void) view; + (void) c; +#endif /* HAVE_CHARSET */ + return FALSE; +} + +/* --------------------------------------------------------------------------------------------- */ + +/* actually is_non_spacing_mark_or_enclosing_mark */ +static gboolean +mcview_is_non_spacing_mark (const WView * view, int c) +{ +#ifdef HAVE_CHARSET + if (view->utf8) + { + GUnicodeType type; + + type = g_unichar_type (c); + + return type == G_UNICODE_NON_SPACING_MARK || type == G_UNICODE_ENCLOSING_MARK; + } +#else + (void) view; + (void) c; +#endif /* HAVE_CHARSET */ + return FALSE; +} + +/* --------------------------------------------------------------------------------------------- */ + +#if 0 +static gboolean +mcview_is_spacing_mark (const WView * view, int c) +{ +#ifdef HAVE_CHARSET + if (view->utf8) + return g_unichar_type (c) == G_UNICODE_SPACING_MARK; +#else + (void) view; + (void) c; +#endif /* HAVE_CHARSET */ + return FALSE; +} +#endif /* 0 */ + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +mcview_isprint (const WView * view, int c) +{ +#ifdef HAVE_CHARSET + if (!view->utf8) + c = convert_from_8bit_to_utf_c ((unsigned char) c, view->converter); + return g_unichar_isprint (c); +#else + (void) view; + /* TODO this is very-very buggy by design: ticket 3257 comments 0-1 */ + return is_printable (c); +#endif /* HAVE_CHARSET */ +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +mcview_char_display (const WView * view, int c, char *s) +{ +#ifdef HAVE_CHARSET + if (mc_global.utf8_display) + { + if (!view->utf8) + c = convert_from_8bit_to_utf_c ((unsigned char) c, view->converter); + if (!g_unichar_isprint (c)) + c = '.'; + return g_unichar_to_utf8 (c, s); + } + if (view->utf8) + { + if (g_unichar_iswide (c)) + { + s[0] = s[1] = '.'; + return 2; + } + if (g_unichar_iszerowidth (c)) + return 0; + /* TODO the is_printable check below will be broken for this */ + c = convert_from_utf_to_current_c (c, view->converter); + } + else + { + /* TODO the is_printable check below will be broken for this */ + c = convert_to_display_c (c); + } +#else + (void) view; +#endif /* HAVE_CHARSET */ + /* TODO this is very-very buggy by design: ticket 3257 comments 0-1 */ + if (!is_printable (c)) + c = '.'; + *s = c; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +/** + * Just for convenience, a common interface in front of mcview_get_utf and mcview_get_byte, so that + * the caller doesn't have to care about utf8 vs 8-bit modes. + * + * Normally: stores c, updates state, returns TRUE. + * At EOF: state is unchanged, c is undefined, returns FALSE. + * + * Just as with mcview_get_utf(), invalid UTF-8 is reported using negative integers. + * + * Also, temporary hack: handle force_max here. + * TODO: move it to lower layers (datasource.c)? + */ +static gboolean +mcview_get_next_char (WView * view, mcview_state_machine_t * state, int *c) +{ + /* Pretend EOF if we reached force_max */ + if (view->force_max >= 0 && state->offset >= view->force_max) + return FALSE; + +#ifdef HAVE_CHARSET + if (view->utf8) + { + int char_length = 0; + + if (!mcview_get_utf (view, state->offset, c, &char_length)) + return FALSE; + /* Pretend EOF if we crossed force_max */ + if (view->force_max >= 0 && state->offset + char_length > view->force_max) + return FALSE; + + state->offset += char_length; + return TRUE; + } +#endif /* HAVE_CHARSET */ + if (!mcview_get_byte (view, state->offset, c)) + return FALSE; + state->offset++; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ +/** + * This function parses the next nroff character and gives it to you along with its desired color, + * so you never have to care about nroff again. + * + * The nroff mode does the backspace trick for every single character (Unicode codepoint). At least + * that's what the GNU groff 1.22 package produces, and that's what less 458 expects. For + * double-wide characters (CJK), still only a single backspace is emitted. For combining accents + * and such, the print-backspace-print step is repeated for the base character and then for each + * accent separately. + * + * So, the right place for this layer is after the bytes are interpreted in UTF-8, but before + * joining a base character with its combining accents. + * + * Normally: stores c and color, updates state, returns TRUE. + * At EOF: state is unchanged, c and color are undefined, returns FALSE. + * + * color can be null if the caller doesn't care. + */ +static gboolean +mcview_get_next_maybe_nroff_char (WView * view, mcview_state_machine_t * state, int *c, int *color) +{ + mcview_state_machine_t state_after_nroff; + int c2, c3; + + if (color != NULL) + *color = VIEW_NORMAL_COLOR; + + if (!view->mode_flags.nroff) + return mcview_get_next_char (view, state, c); + + if (!mcview_get_next_char (view, state, c)) + return FALSE; + /* Don't allow nroff formatting around CR, LF, TAB or other special chars */ + if (!mcview_isprint (view, *c)) + return TRUE; + + state_after_nroff = *state; + + if (!mcview_get_next_char (view, &state_after_nroff, &c2)) + return TRUE; + if (c2 != '\b') + return TRUE; + + if (!mcview_get_next_char (view, &state_after_nroff, &c3)) + return TRUE; + if (!mcview_isprint (view, c3)) + return TRUE; + + if (*c == '_' && c3 == '_') + { + *state = state_after_nroff; + if (color != NULL) + *color = + state->nroff_underscore_is_underlined ? VIEW_UNDERLINED_COLOR : VIEW_BOLD_COLOR; + } + else if (*c == c3) + { + *state = state_after_nroff; + state->nroff_underscore_is_underlined = FALSE; + if (color != NULL) + *color = VIEW_BOLD_COLOR; + } + else if (*c == '_') + { + *c = c3; + *state = state_after_nroff; + state->nroff_underscore_is_underlined = TRUE; + if (color != NULL) + *color = VIEW_UNDERLINED_COLOR; + } + + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ +/** + * Get one base character, along with its combining or spacing mark characters. + * + * (A spacing mark is a character that extends the base character's width 1 into a combined + * character of width 2, yet these two character cells should not be separated. E.g. Devanagari + * <U+0939><U+094B>.) + * + * This method exists mainly for two reasons. One is to be able to tell if we fit on the current + * line or need to wrap to the next one. The other is that both slang and ncurses seem to require + * that the character and its combining marks are printed in a single call (or is it just a + * limitation of mc's wrapper to them?). + * + * For convenience, this method takes care of converting CR or CR+LF into LF. + * TODO this should probably happen later, when displaying the file? + * + * Normally: stores cs and color, updates state, returns >= 1 (entries in cs). + * At EOF: state is unchanged, cs and color are undefined, returns 0. + * + * @param view ... + * @param state the parser-formatter state machine's state, updated + * @param cs store the characters here + * @param clen the room available in cs (that is, at most clen-1 combining marks are allowed), must + * be at least 2 + * @param color if non-NULL, store the color here, taken from the first codepoint's color + * @return the number of entries placed in cs, or 0 on EOF + */ +static int +mcview_next_combining_char_sequence (WView * view, mcview_state_machine_t * state, int *cs, + int clen, int *color) +{ + int i = 1; + + if (!mcview_get_next_maybe_nroff_char (view, state, cs, color)) + return 0; + + /* Process \r and \r\n newlines. */ + if (cs[0] == '\r') + { + int cnext; + + mcview_state_machine_t state_after_crlf = *state; + if (mcview_get_next_maybe_nroff_char (view, &state_after_crlf, &cnext, NULL) + && cnext == '\n') + *state = state_after_crlf; + cs[0] = '\n'; + return 1; + } + + /* We don't want combining over non-printable characters. This includes '\n' and '\t' too. */ + if (!mcview_isprint (view, cs[0])) + return 1; + + if (mcview_ismark (view, cs[0])) + { + if (!state->print_lonely_combining) + { + /* First character is combining. Either just return it, ... */ + return 1; + } + else + { + /* or place this (and subsequent combining ones) over a dotted circle. */ + cs[1] = cs[0]; + cs[0] = BASE_CHARACTER_FOR_LONELY_COMBINING; + i = 2; + } + } + + if (mcview_wcwidth (view, cs[0]) == 2) + { + /* Don't allow combining or spacing mark for wide characters, is this okay? */ + return 1; + } + + /* Look for more combining chars. Either at most clen-1 zero-width combining chars, + * or at most 1 spacing mark. Is this logic correct? */ + for (; i < clen; i++) + { + mcview_state_machine_t state_after_combining; + + state_after_combining = *state; + if (!mcview_get_next_maybe_nroff_char (view, &state_after_combining, &cs[i], NULL)) + return i; + if (!mcview_ismark (view, cs[i]) || !mcview_isprint (view, cs[i])) + return i; + if (g_unichar_type (cs[i]) == G_UNICODE_SPACING_MARK) + { + /* Only allow as the first combining char. Stop processing in either case. */ + if (i == 1) + { + *state = state_after_combining; + i++; + } + return i; + } + *state = state_after_combining; + } + return i; +} + +/* --------------------------------------------------------------------------------------------- */ +/** + * Parse, format and possibly display one visual line of text. + * + * Formatting starts at the given "state" (which encodes the file offset and parser and formatter's + * internal state). In unwrap mode, this should point to the beginning of the paragraph with the + * default state, the additional horizontal scrolling is added here. In wrap mode, this should + * point to the beginning of the line, with the proper state at that point. + * + * In wrap mode, if a line ends in a newline, it is consumed, even if it's exactly at the right + * edge. In unwrap mode, the whole remaining line, including the newline is consumed. Displaying + * the next line should start at "state"'s new value, or if we displayed the bottom line then + * state->offset tells the file offset to be shown in the top bar. + * + * If "row" is offscreen, don't actually display the line but still update "state" and return the + * proper value. This is used by mcview_wrap_move_down to advance in the file. + * + * @param view ... + * @param state the parser-formatter state machine's state, updated + * @param row print to this row + * @param paragraph_ended store TRUE if paragraph ended by newline or EOF, FALSE if wraps to next + * line + * @param linewidth store the width of the line here + * @return the number of rows, that is, 0 if we were already at EOF, otherwise 1 + */ +static int +mcview_display_line (WView * view, mcview_state_machine_t * state, int row, + gboolean * paragraph_ended, off_t * linewidth) +{ + const WRect *r = &view->data_area; + off_t dpy_text_column = view->mode_flags.wrap ? 0 : view->dpy_text_column; + int col = 0; + int cs[1 + MAX_COMBINING_CHARS]; + char str[(1 + MAX_COMBINING_CHARS) * UTF8_CHAR_LEN + 1]; + int i, j; + + if (paragraph_ended != NULL) + *paragraph_ended = TRUE; + + if (!view->mode_flags.wrap && (row < 0 || row >= r->lines) && linewidth == NULL) + { + /* Optimization: Fast forward to the end of the line, rather than carefully + * parsing and then not actually displaying it. */ + off_t eol; + int retval; + + eol = mcview_eol (view, state->offset); + retval = (eol > state->offset) ? 1 : 0; + + mcview_state_machine_init (state, eol); + return retval; + } + + while (TRUE) + { + int charwidth = 0; + mcview_state_machine_t state_saved; + int n; + int color; + + state_saved = *state; + n = mcview_next_combining_char_sequence (view, state, cs, 1 + MAX_COMBINING_CHARS, &color); + if (n == 0) + { + if (linewidth != NULL) + *linewidth = col; + return (col > 0) ? 1 : 0; + } + + if (view->search_start <= state->offset && state->offset < view->search_end) + color = VIEW_SELECTED_COLOR; + + if (cs[0] == '\n') + { + /* New line: reset all formatting state for the next paragraph. */ + mcview_state_machine_init (state, state->offset); + if (linewidth != NULL) + *linewidth = col; + return 1; + } + + if (mcview_is_non_spacing_mark (view, cs[0])) + { + /* Lonely combining character. Probably leftover after too many combining chars. Just ignore. */ + continue; + } + + /* Nonprintable, or lonely spacing mark */ + if ((!mcview_isprint (view, cs[0]) || mcview_ismark (view, cs[0])) && cs[0] != '\t') + cs[0] = '.'; + + for (i = 0; i < n; i++) + charwidth += mcview_wcwidth (view, cs[i]); + + /* Adjust the width for TAB. It's handled below along with the normal characters, + * so that it's wrapped consistently with them, and is painted with the proper + * attributes (although currently it can't have a special color). */ + if (cs[0] == '\t') + { + charwidth = option_tab_spacing - state->unwrapped_column % option_tab_spacing; + state->print_lonely_combining = TRUE; + } + else + state->print_lonely_combining = FALSE; + + /* In wrap mode only: We're done with this row if the character sequence wouldn't fit. + * Except if at the first column, because then it wouldn't fit in the next row either. + * In this extreme case let the unwrapped code below do its best to display it. */ + if (view->mode_flags.wrap && (off_t) col + charwidth > dpy_text_column + (off_t) r->cols + && col > 0) + { + *state = state_saved; + if (paragraph_ended != NULL) + *paragraph_ended = FALSE; + if (linewidth != NULL) + *linewidth = col; + return 1; + } + + /* Display, unless outside of the viewport. */ + if (row >= 0 && row < r->lines) + { + if ((off_t) col >= dpy_text_column && + (off_t) col + charwidth <= dpy_text_column + (off_t) r->cols) + { + /* The combining character sequence fits entirely in the viewport. Print it. */ + tty_setcolor (color); + widget_gotoyx (view, r->y + row, r->x + ((off_t) col - dpy_text_column)); + if (cs[0] == '\t') + { + for (i = 0; i < charwidth; i++) + tty_print_char (' '); + } + else + { + j = 0; + for (i = 0; i < n; i++) + j += mcview_char_display (view, cs[i], str + j); + str[j] = '\0'; + /* This is probably a bug in our tty layer, but tty_print_string + * normalizes the string, whereas tty_printf doesn't. Don't normalize, + * since we handle combining characters ourselves correctly, it's + * better if they are copy-pasted correctly. Ticket 3255. */ + tty_printf ("%s", str); + } + } + else if ((off_t) col < dpy_text_column && (off_t) col + charwidth > dpy_text_column) + { + /* The combining character sequence would cross the left edge of the viewport. + * This cannot happen with wrap mode. Print replacement character(s), + * or spaces with the correct attributes for partial Tabs. */ + tty_setcolor (color); + for (i = dpy_text_column; + i < (off_t) col + charwidth && i < dpy_text_column + (off_t) r->cols; i++) + { + widget_gotoyx (view, r->y + row, r->x + (i - dpy_text_column)); + tty_print_anychar ((cs[0] == '\t') ? ' ' : PARTIAL_CJK_AT_LEFT_MARGIN); + } + } + else if ((off_t) col < dpy_text_column + (off_t) r->cols && + (off_t) col + charwidth > dpy_text_column + (off_t) r->cols) + { + /* The combining character sequence would cross the right edge of the viewport + * and we're not wrapping. Print replacement character(s), + * or spaces with the correct attributes for partial Tabs. */ + tty_setcolor (color); + for (i = col; i < dpy_text_column + (off_t) r->cols; i++) + { + widget_gotoyx (view, r->y + row, r->x + (i - dpy_text_column)); + tty_print_anychar ((cs[0] == '\t') ? ' ' : PARTIAL_CJK_AT_RIGHT_MARGIN); + } + } + } + + col += charwidth; + state->unwrapped_column += charwidth; + + if (!view->mode_flags.wrap && (off_t) col >= dpy_text_column + (off_t) r->cols + && linewidth == NULL) + { + /* Optimization: Fast forward to the end of the line, rather than carefully + * parsing and then not actually displaying it. */ + off_t eol; + + eol = mcview_eol (view, state->offset); + mcview_state_machine_init (state, eol); + return 1; + } + } +} + +/* --------------------------------------------------------------------------------------------- */ +/** + * Parse, format and possibly display one paragraph (perhaps not from the beginning). + * + * Formatting starts at the given "state" (which encodes the file offset and parser and formatter's + * internal state). In unwrap mode, this should point to the beginning of the paragraph with the + * default state, the additional horizontal scrolling is added here. In wrap mode, this may point + * to the beginning of the line within a paragraph (to display the partial paragraph at the top), + * with the proper state at that point. + * + * Displaying the next paragraph should start at "state"'s new value, or if we displayed the bottom + * line then state->offset tells the file offset to be shown in the top bar. + * + * If "row" is negative, don't display the first abs(row) lines and display the rest from the top. + * This was a nice idea but it's now unused :) + * + * If "row" is too large, don't display the paragraph at all but still return the number of lines. + * This is used when moving upwards. + * + * @param view ... + * @param state the parser-formatter state machine's state, updated + * @param row print starting at this row + * @return the number of rows the paragraphs is wrapped to, that is, 0 if we were already at EOF, + * otherwise 1 in unwrap mode, >= 1 in wrap mode. We stop when reaching the bottom of the + * viewport, it's not counted how many more lines the paragraph would occupy + */ +static int +mcview_display_paragraph (WView * view, mcview_state_machine_t * state, int row) +{ + int lines = 0; + + while (TRUE) + { + gboolean paragraph_ended; + + lines += mcview_display_line (view, state, row, ¶graph_ended, NULL); + if (paragraph_ended) + return lines; + + if (row < view->data_area.lines) + { + row++; + /* stop if bottom of screen reached */ + if (row >= view->data_area.lines) + return lines; + } + } +} + +/* --------------------------------------------------------------------------------------------- */ +/** + * Recompute dpy_state_top from dpy_start and dpy_paragraph_skip_lines. Clamp + * dpy_paragraph_skip_lines if necessary. + * + * This method should be called in wrap mode after changing one of the parsing or formatting + * properties (e.g. window width, encoding, nroff), or when switching to wrap mode from unwrap or + * hex. + * + * If we stayed within the same paragraph then try to keep the vertical offset within that + * paragraph as well. It might happen though that the paragraph became shorter than our desired + * vertical position, in that case move to its last row. + */ +static void +mcview_wrap_fixup (WView * view) +{ + int lines = view->dpy_paragraph_skip_lines; + + if (!view->dpy_wrap_dirty) + return; + view->dpy_wrap_dirty = FALSE; + + view->dpy_paragraph_skip_lines = 0; + mcview_state_machine_init (&view->dpy_state_top, view->dpy_start); + + while (lines-- != 0) + { + mcview_state_machine_t state_prev; + gboolean paragraph_ended; + + state_prev = view->dpy_state_top; + if (mcview_display_line (view, &view->dpy_state_top, -1, ¶graph_ended, NULL) == 0) + break; + if (paragraph_ended) + { + view->dpy_state_top = state_prev; + break; + } + view->dpy_paragraph_skip_lines++; + } +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/** + * In both wrap and unwrap modes, dpy_start points to the beginning of the paragraph. + * + * In unwrap mode, start displaying from this position, probably applying an additional horizontal + * scroll. + * + * In wrap mode, an additional dpy_paragraph_skip_lines lines are skipped from the top of this + * paragraph. dpy_state_top contains the position and parser-formatter state corresponding to the + * top left corner so we can just start rendering from here. Unless dpy_wrap_dirty is set in which + * case dpy_state_top is invalid and we need to recompute first. + */ +void +mcview_display_text (WView * view) +{ + const WRect *r = &view->data_area; + int row; + mcview_state_machine_t state; + gboolean again; + + do + { + int n; + + again = FALSE; + + mcview_display_clean (view); + mcview_display_ruler (view); + + if (!view->mode_flags.wrap) + mcview_state_machine_init (&state, view->dpy_start); + else + { + mcview_wrap_fixup (view); + state = view->dpy_state_top; + } + + for (row = 0; row < r->lines; row += n) + { + n = mcview_display_paragraph (view, &state, row); + if (n == 0) + { + /* In the rare case that displaying didn't start at the beginning + * of the file, yet there are some empty lines at the bottom, + * scroll the file and display again. This happens when e.g. the + * window is made bigger, or the file becomes shorter due to + * charset change or enabling nroff. */ + if ((view->mode_flags.wrap ? view->dpy_state_top.offset : view->dpy_start) > 0) + { + mcview_ascii_move_up (view, r->lines - row); + again = TRUE; + } + break; + } + } + } + while (again); + + view->dpy_end = state.offset; + view->dpy_state_bottom = state; + + tty_setcolor (VIEW_NORMAL_COLOR); + if (mcview_show_eof != NULL && mcview_show_eof[0] != '\0') + while (row < r->lines) + { + widget_gotoyx (view, r->y + row, r->x); + /* TODO: should make it no wider than the viewport */ + tty_print_string (mcview_show_eof); + row++; + } +} + +/* --------------------------------------------------------------------------------------------- */ +/** + * Move down. + * + * It's very simple. Just invisibly format the next "lines" lines, carefully carrying the formatter + * state in wrap mode. But before each step we need to check if we've already hit the end of the + * file, in that case we can no longer move. This is done by walking from dpy_state_bottom. + * + * Note that this relies on mcview_display_text() setting dpy_state_bottom to its correct value + * upon rendering the screen contents. So don't call this function from other functions (e.g. at + * the bottom of mcview_ascii_move_up()) which invalidate this value. + */ +void +mcview_ascii_move_down (WView * view, off_t lines) +{ + while (lines-- != 0) + { + gboolean paragraph_ended; + + /* See if there's still data below the bottom line, by imaginarily displaying one + * more line. This takes care of reading more data into growbuf, if required. + * If the end position didn't advance, we're at EOF and hence bail out. */ + if (mcview_display_line (view, &view->dpy_state_bottom, -1, ¶graph_ended, NULL) == 0) + break; + + /* Okay, there's enough data. Move by 1 row at the top, too. No need to check for + * EOF, that can't happen. */ + if (!view->mode_flags.wrap) + { + view->dpy_start = mcview_eol (view, view->dpy_start); + view->dpy_paragraph_skip_lines = 0; + view->dpy_wrap_dirty = TRUE; + } + else + { + mcview_display_line (view, &view->dpy_state_top, -1, ¶graph_ended, NULL); + if (!paragraph_ended) + view->dpy_paragraph_skip_lines++; + else + { + view->dpy_start = view->dpy_state_top.offset; + view->dpy_paragraph_skip_lines = 0; + } + } + } +} + +/* --------------------------------------------------------------------------------------------- */ +/** + * Move up. + * + * Unwrap mode: Piece of cake. Wrap mode: If we'd walk back more than the current line offset + * within the paragraph, we need to jump back to the previous paragraph and compute its height to + * see if we start from that paragraph, and repeat this if necessary. Once we're within the desired + * paragraph, we still need to format it from its beginning to know the state. + * + * See the top of this file for comments about MAX_BACKWARDS_WALK_IN_PARAGRAPH. + * + * force_max is a nice protection against the rare extreme case that the file underneath us + * changes, we don't want to endlessly consume a file of maybe full of zeros upon moving upwards. + */ +void +mcview_ascii_move_up (WView * view, off_t lines) +{ + if (!view->mode_flags.wrap) + { + while (lines-- != 0) + view->dpy_start = mcview_bol (view, view->dpy_start - 1, 0); + view->dpy_paragraph_skip_lines = 0; + view->dpy_wrap_dirty = TRUE; + } + else + { + int i; + + while (lines > view->dpy_paragraph_skip_lines) + { + /* We need to go back to the previous paragraph. */ + if (view->dpy_start == 0) + { + /* Oops, we're already in the first paragraph. */ + view->dpy_paragraph_skip_lines = 0; + mcview_state_machine_init (&view->dpy_state_top, 0); + return; + } + lines -= view->dpy_paragraph_skip_lines; + view->force_max = view->dpy_start; + view->dpy_start = + mcview_bol (view, view->dpy_start - 1, + view->dpy_start - MAX_BACKWARDS_WALK_IN_PARAGRAPH); + mcview_state_machine_init (&view->dpy_state_top, view->dpy_start); + /* This is a tricky way of denoting that we're at the end of the paragraph. + * Normally we'd jump to the next paragraph and reset paragraph_skip_lines. But for + * walking backwards this is exactly what we need. */ + view->dpy_paragraph_skip_lines = + mcview_display_paragraph (view, &view->dpy_state_top, view->data_area.lines); + view->force_max = -1; + } + + /* Okay, we have have dpy_start pointing to the desired paragraph, and we still need to + * walk back "lines" lines from the current "dpy_paragraph_skip_lines" offset. We can't do + * that, so walk from the beginning of the paragraph. */ + mcview_state_machine_init (&view->dpy_state_top, view->dpy_start); + view->dpy_paragraph_skip_lines -= lines; + for (i = 0; i < view->dpy_paragraph_skip_lines; i++) + mcview_display_line (view, &view->dpy_state_top, -1, NULL, NULL); + } +} + +/* --------------------------------------------------------------------------------------------- */ + +void +mcview_ascii_moveto_bol (WView * view) +{ + if (!view->mode_flags.wrap) + view->dpy_text_column = 0; +} + +/* --------------------------------------------------------------------------------------------- */ + +void +mcview_ascii_moveto_eol (WView * view) +{ + if (!view->mode_flags.wrap) + { + mcview_state_machine_t state; + off_t linewidth; + + /* Get the width of the topmost paragraph. */ + mcview_state_machine_init (&state, view->dpy_start); + mcview_display_line (view, &state, -1, NULL, &linewidth); + view->dpy_text_column = DOZ (linewidth, (off_t) view->data_area.cols); + } +} + +/* --------------------------------------------------------------------------------------------- */ + +void +mcview_state_machine_init (mcview_state_machine_t * state, off_t offset) +{ + memset (state, 0, sizeof (*state)); + state->offset = offset; + state->print_lonely_combining = TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ |