diff options
Diffstat (limited to '')
-rw-r--r-- | wp-includes/kses.php | 78 |
1 files changed, 71 insertions, 7 deletions
diff --git a/wp-includes/kses.php b/wp-includes/kses.php index cccb176..0ef5803 100644 --- a/wp-includes/kses.php +++ b/wp-includes/kses.php @@ -963,6 +963,7 @@ function wp_kses_version() { * It also matches stray `>` characters. * * @since 1.0.0 + * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments. * * @global array[]|string $pass_allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. @@ -981,7 +982,21 @@ function wp_kses_split( $content, $allowed_html, $allowed_protocols ) { $pass_allowed_html = $allowed_html; $pass_allowed_protocols = $allowed_protocols; - return preg_replace_callback( '%(<!--.*?(-->|$))|(<[^>]*(>|$)|>)%', '_wp_kses_split_callback', $content ); + $token_pattern = <<<REGEX +~ + ( # Detect comments of various flavors before attempting to find tags. + (<!--.*?(-->|$)) # - Normative HTML comments. + | + </[^a-zA-Z][^>]*> # - Closing tags with invalid tag names. + | + <![^>]*> # - Invalid markup declaration nodes. Not all invalid nodes + # are matched so as to avoid breaking legacy behaviors. + ) + | + (<[^>]*(>|$)|>) # Tag-like spans of text. +~x +REGEX; + return preg_replace_callback( $token_pattern, '_wp_kses_split_callback', $content ); } /** @@ -1069,23 +1084,69 @@ function _wp_kses_split_callback( $matches ) { * @access private * @ignore * @since 1.0.0 + * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments. * * @param string $content Content to filter. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Array of allowed URL protocols. + * * @return string Fixed HTML element */ function wp_kses_split2( $content, $allowed_html, $allowed_protocols ) { $content = wp_kses_stripslashes( $content ); - // It matched a ">" character. + /* + * The regex pattern used to split HTML into chunks attempts + * to split on HTML token boundaries. This function should + * thus receive chunks that _either_ start with meaningful + * syntax tokens, like a tag `<div>` or a comment `<!-- ... -->`. + * + * If the first character of the `$content` chunk _isn't_ one + * of these syntax elements, which always starts with `<`, then + * the match had to be for the final alternation of `>`. In such + * case, it's probably standing on its own and could be encoded + * with a character reference to remove ambiguity. + * + * In other words, if this chunk isn't from a match of a syntax + * token, it's just a plaintext greater-than (`>`) sign. + */ if ( ! str_starts_with( $content, '<' ) ) { return '>'; } - // Allow HTML comments. + /* + * When certain invalid syntax constructs appear, the HTML parser + * shifts into what's called the "bogus comment state." This is a + * plaintext state that consumes everything until the nearest `>` + * and then transforms the entire span into an HTML comment. + * + * Preserve these comments and do not treat them like tags. + * + * @see https://html.spec.whatwg.org/#bogus-comment-state + */ + if ( 1 === preg_match( '~^(?:</[^a-zA-Z][^>]*>|<![a-z][^>]*>)$~', $content ) ) { + /** + * Since the pattern matches `</…>` and also `<!…>`, this will + * preserve the type of the cleaned-up token in the output. + */ + $opener = $content[1]; + $content = substr( $content, 2, -1 ); + + do { + $prev = $content; + $content = wp_kses( $content, $allowed_html, $allowed_protocols ); + } while ( $prev !== $content ); + + // Recombine the modified inner content with the original token structure. + return "<{$opener}{$content}>"; + } + + /* + * Normative HTML comments should be handled separately as their + * parsing rules differ from those for tags and text nodes. + */ if ( str_starts_with( $content, '<!--' ) ) { $content = str_replace( array( '<!--', '-->' ), '', $content ); @@ -1263,11 +1324,10 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe * `data-*` (not to be mixed with the HTML 4.0 `data` attribute, see * https://www.w3.org/TR/html40/struct/objects.html#adef-data). * - * Note: the attribute name should only contain `A-Za-z0-9_-` chars, - * double hyphens `--` are not accepted by WordPress. + * Note: the attribute name should only contain `A-Za-z0-9_-` chars. */ if ( str_starts_with( $name_low, 'data-' ) && ! empty( $allowed_attr['data-*'] ) - && preg_match( '/^data(?:-[a-z0-9_]+)+$/', $name_low, $match ) + && preg_match( '/^data-[a-z0-9_-]+$/', $name_low, $match ) ) { /* * Add the whole attribute name to the allowed attributes and set any restrictions @@ -2147,7 +2207,7 @@ function wp_filter_global_styles_post( $data ) { ) { unset( $decoded_data['isGlobalStylesUserThemeJSON'] ); - $data_to_encode = WP_Theme_JSON::remove_insecure_properties( $decoded_data ); + $data_to_encode = WP_Theme_JSON::remove_insecure_properties( $decoded_data, 'custom' ); $data_to_encode['isGlobalStylesUserThemeJSON'] = true; return wp_slash( wp_json_encode( $data_to_encode ) ); @@ -2304,6 +2364,7 @@ function kses_init() { * Added support for `box-shadow`. * @since 6.4.0 Added support for `writing-mode`. * @since 6.5.0 Added support for `background-repeat`. + * @since 6.6.0 Added support for `grid-column`, `grid-row`, and `container-type`. * * @param string $css A string of CSS rules. * @param string $deprecated Not used. @@ -2441,11 +2502,13 @@ function safecss_filter_attr( $css, $deprecated = '' ) { 'grid-auto-columns', 'grid-column-start', 'grid-column-end', + 'grid-column', 'grid-column-gap', 'grid-template-rows', 'grid-auto-rows', 'grid-row-start', 'grid-row-end', + 'grid-row', 'grid-row-gap', 'grid-gap', @@ -2475,6 +2538,7 @@ function safecss_filter_attr( $css, $deprecated = '' ) { 'z-index', 'box-shadow', 'aspect-ratio', + 'container-type', // Custom CSS properties. '--*', |