summaryrefslogtreecommitdiffstats
path: root/wp-includes/kses.php
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--wp-includes/kses.php78
1 files changed, 71 insertions, 7 deletions
diff --git a/wp-includes/kses.php b/wp-includes/kses.php
index cccb176..0ef5803 100644
--- a/wp-includes/kses.php
+++ b/wp-includes/kses.php
@@ -963,6 +963,7 @@ function wp_kses_version() {
* It also matches stray `>` characters.
*
* @since 1.0.0
+ * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments.
*
* @global array[]|string $pass_allowed_html An array of allowed HTML elements and attributes,
* or a context name such as 'post'.
@@ -981,7 +982,21 @@ function wp_kses_split( $content, $allowed_html, $allowed_protocols ) {
$pass_allowed_html = $allowed_html;
$pass_allowed_protocols = $allowed_protocols;
- return preg_replace_callback( '%(<!--.*?(-->|$))|(<[^>]*(>|$)|>)%', '_wp_kses_split_callback', $content );
+ $token_pattern = <<<REGEX
+~
+ ( # Detect comments of various flavors before attempting to find tags.
+ (<!--.*?(-->|$)) # - Normative HTML comments.
+ |
+ </[^a-zA-Z][^>]*> # - Closing tags with invalid tag names.
+ |
+ <![^>]*> # - Invalid markup declaration nodes. Not all invalid nodes
+ # are matched so as to avoid breaking legacy behaviors.
+ )
+ |
+ (<[^>]*(>|$)|>) # Tag-like spans of text.
+~x
+REGEX;
+ return preg_replace_callback( $token_pattern, '_wp_kses_split_callback', $content );
}
/**
@@ -1069,23 +1084,69 @@ function _wp_kses_split_callback( $matches ) {
* @access private
* @ignore
* @since 1.0.0
+ * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments.
*
* @param string $content Content to filter.
* @param array[]|string $allowed_html An array of allowed HTML elements and attributes,
* or a context name such as 'post'. See wp_kses_allowed_html()
* for the list of accepted context names.
* @param string[] $allowed_protocols Array of allowed URL protocols.
+ *
* @return string Fixed HTML element
*/
function wp_kses_split2( $content, $allowed_html, $allowed_protocols ) {
$content = wp_kses_stripslashes( $content );
- // It matched a ">" character.
+ /*
+ * The regex pattern used to split HTML into chunks attempts
+ * to split on HTML token boundaries. This function should
+ * thus receive chunks that _either_ start with meaningful
+ * syntax tokens, like a tag `<div>` or a comment `<!-- ... -->`.
+ *
+ * If the first character of the `$content` chunk _isn't_ one
+ * of these syntax elements, which always starts with `<`, then
+ * the match had to be for the final alternation of `>`. In such
+ * case, it's probably standing on its own and could be encoded
+ * with a character reference to remove ambiguity.
+ *
+ * In other words, if this chunk isn't from a match of a syntax
+ * token, it's just a plaintext greater-than (`>`) sign.
+ */
if ( ! str_starts_with( $content, '<' ) ) {
return '&gt;';
}
- // Allow HTML comments.
+ /*
+ * When certain invalid syntax constructs appear, the HTML parser
+ * shifts into what's called the "bogus comment state." This is a
+ * plaintext state that consumes everything until the nearest `>`
+ * and then transforms the entire span into an HTML comment.
+ *
+ * Preserve these comments and do not treat them like tags.
+ *
+ * @see https://html.spec.whatwg.org/#bogus-comment-state
+ */
+ if ( 1 === preg_match( '~^(?:</[^a-zA-Z][^>]*>|<![a-z][^>]*>)$~', $content ) ) {
+ /**
+ * Since the pattern matches `</…>` and also `<!…>`, this will
+ * preserve the type of the cleaned-up token in the output.
+ */
+ $opener = $content[1];
+ $content = substr( $content, 2, -1 );
+
+ do {
+ $prev = $content;
+ $content = wp_kses( $content, $allowed_html, $allowed_protocols );
+ } while ( $prev !== $content );
+
+ // Recombine the modified inner content with the original token structure.
+ return "<{$opener}{$content}>";
+ }
+
+ /*
+ * Normative HTML comments should be handled separately as their
+ * parsing rules differ from those for tags and text nodes.
+ */
if ( str_starts_with( $content, '<!--' ) ) {
$content = str_replace( array( '<!--', '-->' ), '', $content );
@@ -1263,11 +1324,10 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe
* `data-*` (not to be mixed with the HTML 4.0 `data` attribute, see
* https://www.w3.org/TR/html40/struct/objects.html#adef-data).
*
- * Note: the attribute name should only contain `A-Za-z0-9_-` chars,
- * double hyphens `--` are not accepted by WordPress.
+ * Note: the attribute name should only contain `A-Za-z0-9_-` chars.
*/
if ( str_starts_with( $name_low, 'data-' ) && ! empty( $allowed_attr['data-*'] )
- && preg_match( '/^data(?:-[a-z0-9_]+)+$/', $name_low, $match )
+ && preg_match( '/^data-[a-z0-9_-]+$/', $name_low, $match )
) {
/*
* Add the whole attribute name to the allowed attributes and set any restrictions
@@ -2147,7 +2207,7 @@ function wp_filter_global_styles_post( $data ) {
) {
unset( $decoded_data['isGlobalStylesUserThemeJSON'] );
- $data_to_encode = WP_Theme_JSON::remove_insecure_properties( $decoded_data );
+ $data_to_encode = WP_Theme_JSON::remove_insecure_properties( $decoded_data, 'custom' );
$data_to_encode['isGlobalStylesUserThemeJSON'] = true;
return wp_slash( wp_json_encode( $data_to_encode ) );
@@ -2304,6 +2364,7 @@ function kses_init() {
* Added support for `box-shadow`.
* @since 6.4.0 Added support for `writing-mode`.
* @since 6.5.0 Added support for `background-repeat`.
+ * @since 6.6.0 Added support for `grid-column`, `grid-row`, and `container-type`.
*
* @param string $css A string of CSS rules.
* @param string $deprecated Not used.
@@ -2441,11 +2502,13 @@ function safecss_filter_attr( $css, $deprecated = '' ) {
'grid-auto-columns',
'grid-column-start',
'grid-column-end',
+ 'grid-column',
'grid-column-gap',
'grid-template-rows',
'grid-auto-rows',
'grid-row-start',
'grid-row-end',
+ 'grid-row',
'grid-row-gap',
'grid-gap',
@@ -2475,6 +2538,7 @@ function safecss_filter_attr( $css, $deprecated = '' ) {
'z-index',
'box-shadow',
'aspect-ratio',
+ 'container-type',
// Custom CSS properties.
'--*',