1 files changed, 599 insertions, 113 deletions
diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php
index f27f83b..c76cc19 100644
--- a/wp-includes/html-api/class-wp-html-processor.php
+++ b/wp-includes/html-api/class-wp-html-processor.php
@@ -99,12 +99,20 @@
  *
  * The following list specifies the HTML tags that _are_ supported:
  *
+ *  - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
+ *  - Custom elements: All custom elements are supported. :)
+ *  - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, PROGRESS, SEARCH.
+ *  - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR.
+ *  - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
  *  - Links: A.
- *  - The formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
- *  - Containers: DIV, FIGCAPTION, FIGURE, SPAN.
- *  - Form elements: BUTTON.
- *  - Paragraph: P.
- *  - Void elements: IMG.
+ *  - Lists: DD, DL, DT, LI, OL, UL.
+ *  - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK, VIDEO.
+ *  - Paragraph: BR, P.
+ *  - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
+ *  - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
+ *  - Templating elements: SLOT.
+ *  - Text decoration: RUBY.
+ *  - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM, SPACER.
  *
  * ### Supported markup
  *
@@ -142,17 +150,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	const MAX_BOOKMARKS = 100;
 
 	/**
-	 * Static query for instructing the Tag Processor to visit every token.
-	 *
-	 * @access private
-	 *
-	 * @since 6.4.0
-	 *
-	 * @var array
-	 */
-	const VISIT_EVERYTHING = array( 'tag_closers' => 'visit' );
-
-	/**
 	 * Holds the working state of the parser, including the stack of
 	 * open elements and the stack of active formatting elements.
 	 *
@@ -244,15 +241,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 			return null;
 		}
 
-		$p                        = new self( $html, self::CONSTRUCTOR_UNLOCK_CODE );
-		$p->state->context_node   = array( 'BODY', array() );
-		$p->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+		$processor                        = new self( $html, self::CONSTRUCTOR_UNLOCK_CODE );
+		$processor->state->context_node   = array( 'BODY', array() );
+		$processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
 
-		// @TODO: Create "fake" bookmarks for non-existent but implied nodes.
-		$p->bookmarks['root-node']    = new WP_HTML_Span( 0, 0 );
-		$p->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 );
+		// @todo Create "fake" bookmarks for non-existent but implied nodes.
+		$processor->bookmarks['root-node']    = new WP_HTML_Span( 0, 0 );
+		$processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 );
 
-		$p->state->stack_of_open_elements->push(
+		$processor->state->stack_of_open_elements->push(
 			new WP_HTML_Token(
 				'root-node',
 				'HTML',
@@ -260,15 +257,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 			)
 		);
 
-		$p->state->stack_of_open_elements->push(
+		$processor->state->stack_of_open_elements->push(
 			new WP_HTML_Token(
 				'context-node',
-				$p->state->context_node[0],
+				$processor->state->context_node[0],
 				false
 			)
 		);
 
-		return $p;
+		return $processor;
 	}
 
 	/**
@@ -342,7 +339,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	/**
 	 * Finds the next tag matching the $query.
 	 *
-	 * @TODO: Support matching the class name and tag name.
+	 * @todo Support matching the class name and tag name.
 	 *
 	 * @since 6.4.0
 	 *
@@ -364,6 +361,10 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	public function next_tag( $query = null ) {
 		if ( null === $query ) {
 			while ( $this->step() ) {
+				if ( '#tag' !== $this->get_token_type() ) {
+					continue;
+				}
+
 				if ( ! $this->is_tag_closer() ) {
 					return true;
 				}
@@ -387,6 +388,10 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 
 		if ( ! ( array_key_exists( 'breadcrumbs', $query ) && is_array( $query['breadcrumbs'] ) ) ) {
 			while ( $this->step() ) {
+				if ( '#tag' !== $this->get_token_type() ) {
+					continue;
+				}
+
 				if ( ! $this->is_tag_closer() ) {
 					return true;
 				}
@@ -408,6 +413,10 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 		$match_offset = isset( $query['match_offset'] ) ? (int) $query['match_offset'] : 1;
 
 		while ( $match_offset > 0 && $this->step() ) {
+			if ( '#tag' !== $this->get_token_type() ) {
+				continue;
+			}
+
 			if ( $this->matches_breadcrumbs( $breadcrumbs ) && 0 === --$match_offset ) {
 				return true;
 			}
@@ -417,6 +426,24 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	}
 
 	/**
+	 * Ensures internal accounting is maintained for HTML semantic rules while
+	 * the underlying Tag Processor class is seeking to a bookmark.
+	 *
+	 * This doesn't currently have a way to represent non-tags and doesn't process
+	 * semantic rules for text nodes. For access to the raw tokens consider using
+	 * WP_HTML_Tag_Processor instead.
+	 *
+	 * @since 6.5.0 Added for internal support; do not use.
+	 *
+	 * @access private
+	 *
+	 * @return bool
+	 */
+	public function next_token() {
+		return $this->step();
+	}
+
+	/**
 	 * Indicates if the currently-matched tag matches the given breadcrumbs.
 	 *
 	 * A "*" represents a single tag wildcard, where any tag matches, but not no tags.
@@ -442,10 +469,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 * @return bool Whether the currently-matched tag is found at the given nested structure.
 	 */
 	public function matches_breadcrumbs( $breadcrumbs ) {
-		if ( ! $this->get_tag() ) {
-			return false;
-		}
-
 		// Everything matches when there are zero constraints.
 		if ( 0 === count( $breadcrumbs ) ) {
 			return true;
@@ -492,7 +515,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 			return false;
 		}
 
-		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+		if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
 			/*
 			 * Void elements still hop onto the stack of open elements even though
 			 * there's no corresponding closing tag. This is important for managing
@@ -502,28 +525,42 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 			 * When moving on to the next node, therefore, if the bottom-most element
 			 * on the stack is a void element, it must be closed.
 			 *
-			 * @TODO: Once self-closing foreign elements and BGSOUND are supported,
+			 * @todo Once self-closing foreign elements and BGSOUND are supported,
 			 *        they must also be implicitly closed here too. BGSOUND is
 			 *        special since it's only self-closing if the self-closing flag
 			 *        is provided in the opening tag, otherwise it expects a tag closer.
 			 */
 			$top_node = $this->state->stack_of_open_elements->current_node();
-			if ( $top_node && self::is_void( $top_node->node_name ) ) {
+			if (
+				$top_node && (
+					// Void elements.
+					self::is_void( $top_node->node_name ) ||
+					// Comments, text nodes, and other atomic tokens.
+					'#' === $top_node->node_name[0] ||
+					// Doctype declarations.
+					'html' === $top_node->node_name
+				)
+			) {
 				$this->state->stack_of_open_elements->pop();
 			}
+		}
 
-			parent::next_tag( self::VISIT_EVERYTHING );
+		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+			parent::next_token();
 		}
 
 		// Finish stepping when there are no more tokens in the document.
-		if ( null === $this->get_tag() ) {
+		if (
+			WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+			WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
+		) {
 			return false;
 		}
 
 		$this->state->current_token = new WP_HTML_Token(
-			$this->bookmark_tag(),
-			$this->get_tag(),
-			$this->is_tag_closer(),
+			$this->bookmark_token(),
+			$this->get_token_name(),
+			$this->has_self_closing_flag(),
 			$this->release_internal_bookmark_on_destruct
 		);
 
@@ -551,9 +588,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 * Breadcrumbs start at the outermost parent and descend toward the matched element.
 	 * They always include the entire path from the root HTML node to the matched element.
 	 *
-	 * @TODO: It could be more efficient to expose a generator-based version of this function
-	 *        to avoid creating the array copy on tag iteration. If this is done, it would likely
-	 *        be more useful to walk up the stack when yielding instead of starting at the top.
+	 * @todo It could be more efficient to expose a generator-based version of this function
+	 *       to avoid creating the array copy on tag iteration. If this is done, it would likely
+	 *       be more useful to walk up the stack when yielding instead of starting at the top.
 	 *
 	 * Example
 	 *
@@ -566,10 +603,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 * @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL.
 	 */
 	public function get_breadcrumbs() {
-		if ( ! $this->get_tag() ) {
-			return null;
-		}
-
 		$breadcrumbs = array();
 		foreach ( $this->state->stack_of_open_elements->walk_down() as $stack_item ) {
 			$breadcrumbs[] = $stack_item->node_name;
@@ -594,17 +627,67 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 * @return bool Whether an element was found.
 	 */
 	private function step_in_body() {
-		$tag_name = $this->get_tag();
-		$op_sigil = $this->is_tag_closer() ? '-' : '+';
-		$op       = "{$op_sigil}{$tag_name}";
+		$token_name = $this->get_token_name();
+		$token_type = $this->get_token_type();
+		$op_sigil   = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
+		$op         = "{$op_sigil}{$token_name}";
 
 		switch ( $op ) {
+			case '#comment':
+			case '#funky-comment':
+			case '#presumptuous-tag':
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+
+			case '#text':
+				$this->reconstruct_active_formatting_elements();
+
+				$current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ];
+
+				/*
+				 * > A character token that is U+0000 NULL
+				 *
+				 * Any successive sequence of NULL bytes is ignored and won't
+				 * trigger active format reconstruction. Therefore, if the text
+				 * only comprises NULL bytes then the token should be ignored
+				 * here, but if there are any other characters in the stream
+				 * the active formats should be reconstructed.
+				 */
+				if (
+					1 <= $current_token->length &&
+					"\x00" === $this->html[ $current_token->start ] &&
+					strspn( $this->html, "\x00", $current_token->start, $current_token->length ) === $current_token->length
+				) {
+					// Parse error: ignore the token.
+					return $this->step();
+				}
+
+				/*
+				 * Whitespace-only text does not affect the frameset-ok flag.
+				 * It is probably inter-element whitespace, but it may also
+				 * contain character references which decode only to whitespace.
+				 */
+				$text = $this->get_modifiable_text();
+				if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) {
+					$this->state->frameset_ok = false;
+				}
+
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+
+			case 'html':
+				/*
+				 * > A DOCTYPE token
+				 * > Parse error. Ignore the token.
+				 */
+				return $this->step();
+
 			/*
 			 * > A start tag whose tag name is "button"
 			 */
 			case '+BUTTON':
 				if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) {
-					// @TODO: Indicate a parse error once it's possible. This error does not impact the logic here.
+					// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
 					$this->generate_implied_end_tags();
 					$this->state->stack_of_open_elements->pop_until( 'BUTTON' );
 				}
@@ -621,11 +704,31 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 			 * > "fieldset", "figcaption", "figure", "footer", "header", "hgroup",
 			 * > "main", "menu", "nav", "ol", "p", "search", "section", "summary", "ul"
 			 */
+			case '+ADDRESS':
+			case '+ARTICLE':
+			case '+ASIDE':
 			case '+BLOCKQUOTE':
+			case '+CENTER':
+			case '+DETAILS':
+			case '+DIALOG':
+			case '+DIR':
 			case '+DIV':
+			case '+DL':
+			case '+FIELDSET':
 			case '+FIGCAPTION':
 			case '+FIGURE':
+			case '+FOOTER':
+			case '+HEADER':
+			case '+HGROUP':
+			case '+MAIN':
+			case '+MENU':
+			case '+NAV':
+			case '+OL':
 			case '+P':
+			case '+SEARCH':
+			case '+SECTION':
+			case '+SUMMARY':
+			case '+UL':
 				if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
 					$this->close_a_p_element();
 				}
@@ -639,22 +742,213 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 			 * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main",
 			 * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul"
 			 */
+			case '-ADDRESS':
+			case '-ARTICLE':
+			case '-ASIDE':
 			case '-BLOCKQUOTE':
 			case '-BUTTON':
+			case '-CENTER':
+			case '-DETAILS':
+			case '-DIALOG':
+			case '-DIR':
 			case '-DIV':
+			case '-DL':
+			case '-FIELDSET':
 			case '-FIGCAPTION':
 			case '-FIGURE':
-				if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) {
-					// @TODO: Report parse error.
+			case '-FOOTER':
+			case '-HEADER':
+			case '-HGROUP':
+			case '-LISTING':
+			case '-MAIN':
+			case '-MENU':
+			case '-NAV':
+			case '-OL':
+			case '-PRE':
+			case '-SEARCH':
+			case '-SECTION':
+			case '-SUMMARY':
+			case '-UL':
+				if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) {
+					// @todo Report parse error.
 					// Ignore the token.
 					return $this->step();
 				}
 
 				$this->generate_implied_end_tags();
-				if ( $this->state->stack_of_open_elements->current_node()->node_name !== $tag_name ) {
-					// @TODO: Record parse error: this error doesn't impact parsing.
+				if ( $this->state->stack_of_open_elements->current_node()->node_name !== $token_name ) {
+					// @todo Record parse error: this error doesn't impact parsing.
+				}
+				$this->state->stack_of_open_elements->pop_until( $token_name );
+				return true;
+
+			/*
+			 * > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
+			 */
+			case '+H1':
+			case '+H2':
+			case '+H3':
+			case '+H4':
+			case '+H5':
+			case '+H6':
+				if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
+					$this->close_a_p_element();
+				}
+
+				if (
+					in_array(
+						$this->state->stack_of_open_elements->current_node()->node_name,
+						array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ),
+						true
+					)
+				) {
+					// @todo Indicate a parse error once it's possible.
+					$this->state->stack_of_open_elements->pop();
+				}
+
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+
+			/*
+			 * > A start tag whose tag name is one of: "pre", "listing"
+			 */
+			case '+PRE':
+			case '+LISTING':
+				if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
+					$this->close_a_p_element();
+				}
+				$this->insert_html_element( $this->state->current_token );
+				$this->state->frameset_ok = false;
+				return true;
+
+			/*
+			 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
+			 */
+			case '-H1':
+			case '-H2':
+			case '-H3':
+			case '-H4':
+			case '-H5':
+			case '-H6':
+				if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) {
+					/*
+					 * This is a parse error; ignore the token.
+					 *
+					 * @todo Indicate a parse error once it's possible.
+					 */
+					return $this->step();
+				}
+
+				$this->generate_implied_end_tags();
+
+				if ( $this->state->stack_of_open_elements->current_node()->node_name !== $token_name ) {
+					// @todo Record parse error: this error doesn't impact parsing.
 				}
-				$this->state->stack_of_open_elements->pop_until( $tag_name );
+
+				$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
+				return true;
+
+			/*
+			 * > A start tag whose tag name is "li"
+			 * > A start tag whose tag name is one of: "dd", "dt"
+			 */
+			case '+DD':
+			case '+DT':
+			case '+LI':
+				$this->state->frameset_ok = false;
+				$node                     = $this->state->stack_of_open_elements->current_node();
+				$is_li                    = 'LI' === $token_name;
+
+				in_body_list_loop:
+				/*
+				 * The logic for LI and DT/DD is the same except for one point: LI elements _only_
+				 * close other LI elements, but a DT or DD element closes _any_ open DT or DD element.
+				 */
+				if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) {
+					$node_name = $is_li ? 'LI' : $node->node_name;
+					$this->generate_implied_end_tags( $node_name );
+					if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
+						// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
+					}
+
+					$this->state->stack_of_open_elements->pop_until( $node_name );
+					goto in_body_list_done;
+				}
+
+				if (
+					'ADDRESS' !== $node->node_name &&
+					'DIV' !== $node->node_name &&
+					'P' !== $node->node_name &&
+					$this->is_special( $node->node_name )
+				) {
+					/*
+					 * > If node is in the special category, but is not an address, div,
+					 * > or p element, then jump to the step labeled done below.
+					 */
+					goto in_body_list_done;
+				} else {
+					/*
+					 * > Otherwise, set node to the previous entry in the stack of open elements
+					 * > and return to the step labeled loop.
+					 */
+					foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
+						$node = $item;
+						break;
+					}
+					goto in_body_list_loop;
+				}
+
+				in_body_list_done:
+				if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
+					$this->close_a_p_element();
+				}
+
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+
+			/*
+			 * > An end tag whose tag name is "li"
+			 * > An end tag whose tag name is one of: "dd", "dt"
+			 */
+			case '-DD':
+			case '-DT':
+			case '-LI':
+				if (
+					/*
+					 * An end tag whose tag name is "li":
+					 * If the stack of open elements does not have an li element in list item scope,
+					 * then this is a parse error; ignore the token.
+					 */
+					(
+						'LI' === $token_name &&
+						! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' )
+					) ||
+					/*
+					 * An end tag whose tag name is one of: "dd", "dt":
+					 * If the stack of open elements does not have an element in scope that is an
+					 * HTML element with the same tag name as that of the token, then this is a
+					 * parse error; ignore the token.
+					 */
+					(
+						'LI' !== $token_name &&
+						! $this->state->stack_of_open_elements->has_element_in_scope( $token_name )
+					)
+				) {
+					/*
+					 * This is a parse error, ignore the token.
+					 *
+					 * @todo Indicate a parse error once it's possible.
+					 */
+					return $this->step();
+				}
+
+				$this->generate_implied_end_tags( $token_name );
+
+				if ( $token_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
+					// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
+				}
+
+				$this->state->stack_of_open_elements->pop_until( $token_name );
 				return true;
 
 			/*
@@ -730,47 +1024,174 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 				return true;
 
 			/*
+			 * > An end tag whose tag name is "br"
+			 * >   Parse error. Drop the attributes from the token, and act as described in the next
+			 * >   entry; i.e. act as if this was a "br" start tag token with no attributes, rather
+			 * >   than the end tag token that it actually is.
+			 */
+			case '-BR':
+				$this->last_error = self::ERROR_UNSUPPORTED;
+				throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' );
+
+			/*
 			 * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
 			 */
+			case '+AREA':
+			case '+BR':
+			case '+EMBED':
 			case '+IMG':
+			case '+KEYGEN':
+			case '+WBR':
 				$this->reconstruct_active_formatting_elements();
 				$this->insert_html_element( $this->state->current_token );
+				$this->state->frameset_ok = false;
 				return true;
 
 			/*
-			 * > Any other start tag
+			 * > A start tag whose tag name is "input"
 			 */
-			case '+SPAN':
+			case '+INPUT':
 				$this->reconstruct_active_formatting_elements();
 				$this->insert_html_element( $this->state->current_token );
+				$type_attribute = $this->get_attribute( 'type' );
+				/*
+				 * > If the token does not have an attribute with the name "type", or if it does,
+				 * > but that attribute's value is not an ASCII case-insensitive match for the
+				 * > string "hidden", then: set the frameset-ok flag to "not ok".
+				 */
+				if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) {
+					$this->state->frameset_ok = false;
+				}
 				return true;
 
 			/*
-			 * Any other end tag
+			 * > A start tag whose tag name is "hr"
 			 */
-			case '-SPAN':
-				foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
-					// > If node is an HTML element with the same tag name as the token, then:
-					if ( $item->node_name === $tag_name ) {
-						$this->generate_implied_end_tags( $tag_name );
+			case '+HR':
+				if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
+					$this->close_a_p_element();
+				}
+				$this->insert_html_element( $this->state->current_token );
+				$this->state->frameset_ok = false;
+				return true;
 
-						// > If node is not the current node, then this is a parse error.
+			/*
+			 * > A start tag whose tag name is one of: "param", "source", "track"
+			 */
+			case '+PARAM':
+			case '+SOURCE':
+			case '+TRACK':
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+		}
 
-						$this->state->stack_of_open_elements->pop_until( $tag_name );
-						return true;
-					}
+		/*
+		 * These tags require special handling in the 'in body' insertion mode
+		 * but that handling hasn't yet been implemented.
+		 *
+		 * As the rules for each tag are implemented, the corresponding tag
+		 * name should be removed from this list. An accompanying test should
+		 * help ensure this list is maintained.
+		 *
+		 * @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags
+		 *
+		 * Since this switch structure throws a WP_HTML_Unsupported_Exception, it's
+		 * possible to handle "any other start tag" and "any other end tag" below,
+		 * as that guarantees execution doesn't proceed for the unimplemented tags.
+		 *
+		 * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
+		 */
+		switch ( $token_name ) {
+			case 'APPLET':
+			case 'BASE':
+			case 'BASEFONT':
+			case 'BGSOUND':
+			case 'BODY':
+			case 'CAPTION':
+			case 'COL':
+			case 'COLGROUP':
+			case 'FORM':
+			case 'FRAME':
+			case 'FRAMESET':
+			case 'HEAD':
+			case 'HTML':
+			case 'IFRAME':
+			case 'LINK':
+			case 'MARQUEE':
+			case 'MATH':
+			case 'META':
+			case 'NOBR':
+			case 'NOEMBED':
+			case 'NOFRAMES':
+			case 'NOSCRIPT':
+			case 'OBJECT':
+			case 'OPTGROUP':
+			case 'OPTION':
+			case 'PLAINTEXT':
+			case 'RB':
+			case 'RP':
+			case 'RT':
+			case 'RTC':
+			case 'SARCASM':
+			case 'SCRIPT':
+			case 'SELECT':
+			case 'STYLE':
+			case 'SVG':
+			case 'TABLE':
+			case 'TBODY':
+			case 'TD':
+			case 'TEMPLATE':
+			case 'TEXTAREA':
+			case 'TFOOT':
+			case 'TH':
+			case 'THEAD':
+			case 'TITLE':
+			case 'TR':
+			case 'XMP':
+				$this->last_error = self::ERROR_UNSUPPORTED;
+				throw new WP_HTML_Unsupported_Exception( "Cannot process {$token_name} element." );
+		}
 
-					// > Otherwise, if node is in the special category, then this is a parse error; ignore the token, and return.
-					if ( self::is_special( $item->node_name ) ) {
-						return $this->step();
-					}
+		if ( ! $this->is_tag_closer() ) {
+			/*
+			 * > Any other start tag
+			 */
+			$this->reconstruct_active_formatting_elements();
+			$this->insert_html_element( $this->state->current_token );
+			return true;
+		} else {
+			/*
+			 * > Any other end tag
+			 */
+
+			/*
+			 * Find the corresponding tag opener in the stack of open elements, if
+			 * it exists before reaching a special element, which provides a kind
+			 * of boundary in the stack. For example, a `</custom-tag>` should not
+			 * close anything beyond its containing `P` or `DIV` element.
+			 */
+			foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
+				if ( $token_name === $node->node_name ) {
+					break;
 				}
-				// Execution should not reach here; if it does then something went wrong.
-				return false;
 
-			default:
-				$this->last_error = self::ERROR_UNSUPPORTED;
-				throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." );
+				if ( self::is_special( $node->node_name ) ) {
+					// This is a parse error, ignore the token.
+					return $this->step();
+				}
+			}
+
+			$this->generate_implied_end_tags( $token_name );
+			if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
+				// @todo Record parse error: this error doesn't impact parsing.
+			}
+
+			foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
+				$this->state->stack_of_open_elements->pop();
+				if ( $node === $item ) {
+					return true;
+				}
+			}
 		}
 	}
 
@@ -779,19 +1200,16 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 */
 
 	/**
-	 * Creates a new bookmark for the currently-matched tag and returns the generated name.
+	 * Creates a new bookmark for the currently-matched token and returns the generated name.
 	 *
 	 * @since 6.4.0
+	 * @since 6.5.0 Renamed from bookmark_tag() to bookmark_token().
 	 *
 	 * @throws Exception When unable to allocate requested bookmark.
 	 *
 	 * @return string|false Name of created bookmark, or false if unable to create.
 	 */
-	private function bookmark_tag() {
-		if ( ! $this->get_tag() ) {
-			return false;
-		}
-
+	private function bookmark_token() {
 		if ( ! parent::set_bookmark( ++$this->bookmark_counter ) ) {
 			$this->last_error = self::ERROR_EXCEEDED_MAX_BOOKMARKS;
 			throw new Exception( 'could not allocate bookmark' );
@@ -863,6 +1281,10 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	/**
 	 * Moves the internal cursor in the HTML Processor to a given bookmark's location.
 	 *
+	 * Be careful! Seeking backwards to a previous location resets the parser to the
+	 * start of the document and reparses the entire contents up until it finds the
+	 * sought-after bookmarked location.
+	 *
 	 * In order to prevent accidental infinite loops, there's a
 	 * maximum limit on the number of times seek() can be called.
 	 *
@@ -874,6 +1296,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 * @return bool Whether the internal cursor was successfully moved to the bookmark's location.
 	 */
 	public function seek( $bookmark_name ) {
+		// Flush any pending updates to the document before beginning.
+		$this->get_updated_html();
+
 		$actual_bookmark_name = "_{$bookmark_name}";
 		$processor_started_at = $this->state->current_token
 			? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start
@@ -881,44 +1306,73 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 		$bookmark_starts_at   = $this->bookmarks[ $actual_bookmark_name ]->start;
 		$direction            = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward';
 
-		switch ( $direction ) {
-			case 'forward':
-				// When moving forwards, re-parse the document until reaching the same location as the original bookmark.
-				while ( $this->step() ) {
-					if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
-						return true;
-					}
+		/*
+		 * If seeking backwards, it's possible that the sought-after bookmark exists within an element
+		 * which has been closed before the current cursor; in other words, it has already been removed
+		 * from the stack of open elements. This means that it's insufficient to simply pop off elements
+		 * from the stack of open elements which appear after the bookmarked location and then jump to
+		 * that location, as the elements which were open before won't be re-opened.
+		 *
+		 * In order to maintain consistency, the HTML Processor rewinds to the start of the document
+		 * and reparses everything until it finds the sought-after bookmark.
+		 *
+		 * There are potentially better ways to do this: cache the parser state for each bookmark and
+		 * restore it when seeking; store an immutable and idempotent register of where elements open
+		 * and close.
+		 *
+		 * If caching the parser state it will be essential to properly maintain the cached stack of
+		 * open elements and active formatting elements when modifying the document. This could be a
+		 * tedious and time-consuming process as well, and so for now will not be performed.
+		 *
+		 * It may be possible to track bookmarks for where elements open and close, and in doing so
+		 * be able to quickly recalculate breadcrumbs for any element in the document. It may even
+		 * be possible to remove the stack of open elements and compute it on the fly this way.
+		 * If doing this, the parser would need to track the opening and closing locations for all
+		 * tokens in the breadcrumb path for any and all bookmarks. By utilizing bookmarks themselves
+		 * this list could be automatically maintained while modifying the document. Finding the
+		 * breadcrumbs would then amount to traversing that list from the start until the token
+		 * being inspected. Once an element closes, if there are no bookmarks pointing to locations
+		 * within that element, then all of these locations may be forgotten to save on memory use
+		 * and computation time.
+		 */
+		if ( 'backward' === $direction ) {
+			/*
+			 * Instead of clearing the parser state and starting fresh, calling the stack methods
+			 * maintains the proper flags in the parser.
+			 */
+			foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
+				if ( 'context-node' === $item->bookmark_name ) {
+					break;
 				}
 
-				return false;
-
-			case 'backward':
-				/*
-				 * When moving backwards, clear out all existing stack entries which appear after the destination
-				 * bookmark. These could be stored for later retrieval, but doing so would require additional
-				 * memory overhead and also demand that references and bookmarks are updated as the document
-				 * changes. In time this could be a valuable optimization, but it's okay to give up that
-				 * optimization in exchange for more CPU time to recompute the stack, to re-parse the
-				 * document that may have already been parsed once.
-				 */
-				foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
-					if ( $bookmark_starts_at >= $this->bookmarks[ $item->bookmark_name ]->start ) {
-						break;
-					}
+				$this->state->stack_of_open_elements->remove_node( $item );
+			}
 
-					$this->state->stack_of_open_elements->remove_node( $item );
+			foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
+				if ( 'context-node' === $item->bookmark_name ) {
+					break;
 				}
 
-				foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
-					if ( $bookmark_starts_at >= $this->bookmarks[ $item->bookmark_name ]->start ) {
-						break;
-					}
+				$this->state->active_formatting_elements->remove_node( $item );
+			}
 
-					$this->state->active_formatting_elements->remove_node( $item );
-				}
+			parent::seek( 'context-node' );
+			$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+			$this->state->frameset_ok    = true;
+		}
 
-				return parent::seek( $actual_bookmark_name );
+		// When moving forwards, reparse the document until reaching the same location as the original bookmark.
+		if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
+			return true;
 		}
+
+		while ( $this->step() ) {
+			if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
+				return true;
+			}
+		}
+
+		return false;
 	}
 
 	/**
@@ -1005,6 +1459,18 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 		return parent::set_bookmark( "_{$bookmark_name}" );
 	}
 
+	/**
+	 * Checks whether a bookmark with the given name exists.
+	 *
+	 * @since 6.5.0
+	 *
+	 * @param string $bookmark_name Name to identify a bookmark that potentially exists.
+	 * @return bool Whether that bookmark exists.
+	 */
+	public function has_bookmark( $bookmark_name ) {
+		return parent::has_bookmark( "_{$bookmark_name}" );
+	}
+
 	/*
 	 * HTML Parsing Algorithms
 	 */
@@ -1034,6 +1500,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 */
 	private function generate_implied_end_tags( $except_for_this_element = null ) {
 		$elements_with_implied_end_tags = array(
+			'DD',
+			'DT',
+			'LI',
 			'P',
 		);
 
@@ -1059,6 +1528,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 */
 	private function generate_implied_end_tags_thoroughly() {
 		$elements_with_implied_end_tags = array(
+			'DD',
+			'DT',
+			'LI',
 			'P',
 		);
 
@@ -1170,7 +1642,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 
 			// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
 			if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) {
-				$this->state->active_formatting_elements->remove_node( $formatting_element->bookmark_name );
+				$this->state->active_formatting_elements->remove_node( $formatting_element );
 				return;
 			}
 
@@ -1373,14 +1845,19 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 		return (
 			'AREA' === $tag_name ||
 			'BASE' === $tag_name ||
+			'BASEFONT' === $tag_name || // Obsolete but still treated as void.
+			'BGSOUND' === $tag_name || // Obsolete but still treated as void.
 			'BR' === $tag_name ||
 			'COL' === $tag_name ||
 			'EMBED' === $tag_name ||
+			'FRAME' === $tag_name ||
 			'HR' === $tag_name ||
 			'IMG' === $tag_name ||
 			'INPUT' === $tag_name ||
+			'KEYGEN' === $tag_name || // Obsolete but still treated as void.
 			'LINK' === $tag_name ||
 			'META' === $tag_name ||
+			'PARAM' === $tag_name || // Obsolete but still treated as void.
 			'SOURCE' === $tag_name ||
 			'TRACK' === $tag_name ||
 			'WBR' === $tag_name
@@ -1410,6 +1887,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	const REPROCESS_CURRENT_NODE = 'reprocess-current-node';
 
 	/**
+	 * Indicates that the current HTML token should be processed without advancing the parser.
+	 *
+	 * @since 6.5.0
+	 *
+	 * @var string
+	 */
+	const PROCESS_CURRENT_NODE = 'process-current-node';
+
+	/**
 	 * Indicates that the parser encountered unsupported markup and has bailed.
 	 *
 	 * @since 6.4.0