From 305a91d7368d06f892a927bdcc7e830a4036a78a Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 10 Jan 2024 22:24:08 -0500 Subject: [PATCH 1/4] HTML API: Introduce HTML Template Renderer --- .../html-api/class-wp-html-tag-processor.php | 4 +- .../html-api/class-wp-html-template.php | 189 ++++++++++++++++++ src/wp-includes/html-api/class-wp-html.php | 84 ++++++++ src/wp-settings.php | 2 + .../phpunit/tests/html-api/wpHtmlTemplate.php | 106 ++++++++++ 5 files changed, 383 insertions(+), 2 deletions(-) create mode 100644 src/wp-includes/html-api/class-wp-html-template.php create mode 100644 src/wp-includes/html-api/class-wp-html.php create mode 100644 tests/phpunit/tests/html-api/wpHtmlTemplate.php diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 169fabe750fcf..e0c8df835114c 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2038,8 +2038,8 @@ private function after_tag() { $this->token_length = null; $this->tag_name_starts_at = null; $this->tag_name_length = null; - $this->text_starts_at = 0; - $this->text_length = 0; + $this->text_starts_at = null; + $this->text_length = null; $this->is_closing_tag = null; $this->attributes = array(); $this->comment_type = null; diff --git a/src/wp-includes/html-api/class-wp-html-template.php b/src/wp-includes/html-api/class-wp-html-template.php new file mode 100644 index 0000000000000..3ad279bf559e1 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html-template.php @@ -0,0 +1,189 @@ +">', + * array( + * 'profile_url' => 'https://profiles.example.com/username', + * 'name' => $user->display_name + * ) + * ); + * // Outputs: Bobby Tables + * + * Do not escape the values supplied to the argument array! This function will escape each + * parameter's value as needed and additional manual escaping may lead to incorrect output. + * + * ## Syntax. + * + * ### Substitution Placeholders. + * + * - `` finds `named_arg` in the arguments array, escapes its value if possible, + * and replaces the placeholder with the escaped value. These may exist inside double-quoted + * HTML tag attributes or in HTML text content between tags. They cannot be used to output a tag + * name or content inside a comment. + * + * ### Spread Attributes. + * + * - `...named_arg` when found within an HTML tag will lookup `named_arg` in the arguments array + * and, if it's an array, will set the attribute on the tag for each key/value pair whose value + * is a string, boolean, or `null`. + * + * ## Notes. + * + * - Attributes may only be supplied for a limited set of types: a string value assigns a double-quoted + * attribute value; `true` sets the attribute as a boolean attribute; `null` removes the attribute. + * If provided any other type of value the attribute will be ignored and its existing value persists. + * + * - If multiple HTML attributes are specified for a given tag they will be applied as if calling + * `set_attribute()` in the order they are specified in the template. This includes any attributes + * assigned through the attribute spread syntax. + * + * - Substitutions in text nodes may only contain string values. If provided any other type of value + * the placeholder will be removed with nothing in its place. + * + * - This function currently escapes all value provided in the arguments array. In the future + * it may provide the ability to nest pre-rendered HTML into the template, but this functionality + * is deferred for a future update. + * + * - This function will not replace content inside of SCRIPT, or STYLE elements. + * + * @since 6.5.0 + * + * @access private + * + * @param string $template The HTML template. + * @param string $args Array of key/value pairs providing substitue values for the placeholders. + * @return string The rendered HTML. + */ + public static function render( $template, $args = array() ) { + $processor = new self( $template ); + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + $text = $processor->get_modifiable_text(); + + // Replace placeholders that are found inside #text nodes. + if ( '#funky-comment' === $type && strlen( $text ) > 0 && '%' === $text[0] ) { + $name = substr( $text, 1 ); + $value = isset( $args[ $name ] ) && is_string( $args[ $name ] ) ? $args[ $name ] : null; + $processor->set_bookmark( 'here' ); + $processor->lexical_updates[] = new WP_HTML_Text_Replacement( + $processor->bookmarks['here']->start, + $processor->bookmarks['here']->length, + null === $value ? '' : esc_html( $value ) + ); + continue; + } + + // For every tag, scan the attributes to look for placeholders. + if ( '#tag' === $type ) { + foreach ( $processor->get_attribute_names_with_prefix( '' ) ?? array() as $attribute_name ) { + if ( str_starts_with( $attribute_name, '...' ) ) { + $spread_name = substr( $attribute_name, 3 ); + if ( isset( $args[ $spread_name ] ) && is_array( $args[ $spread_name ] ) ) { + foreach ( $args[ $spread_name ] as $key => $value ) { + if ( true === $value || null === $value || is_string( $value ) ) { + $processor->set_attribute( $key, $value ); + } + } + } + $processor->remove_attribute( $attribute_name ); + } + + $value = $processor->get_attribute( $attribute_name ); + + if ( ! is_string( $value ) ) { + continue; + } + + // Replace entire attributes if their content is exclusively a placeholder, e.g. `title=""`. + $full_match = null; + if ( preg_match( '~^]+)>$~', $value, $full_match ) ) { + $name = $full_match[1]; + + if ( array_key_exists( $name, $args ) ) { + $value = $args[ $name ]; + if ( null === $value ) { + $processor->remove_attribute( $attribute_name ); + } elseif ( true === $value ) { + $processor->set_attribute( $attribute_name, true ); + } elseif ( is_string( $value ) ) { + $processor->set_attribute( $attribute_name, $args[ $name ] ); + } else { + $processor->remove_attribute( $attribute_name ); + } + } else { + $processor->remove_attribute( $attribute_name ); + } + + continue; + } + + // Replace placeholders embedded in otherwise-static attribute values, e.g. `title="Post: "`. + $new_value = preg_replace_callback( + '~]+)>~', + static function ( $matches ) use ( $args ) { + return is_string( $args[ $matches[1] ] ) + ? esc_attr( $args[ $matches[1] ] ) + : ''; + }, + $value + ); + + if ( $new_value !== $value ) { + $processor->set_attribute( $attribute_name, $new_value ); + } + } + + // Update TEXTAREA and TITLE contents. + $tag_name = $processor->get_tag(); + if ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) { + // Replace placeholders inside these RCDATA tags. + $new_text = preg_replace_callback( + '~]+)>~', + static function ( $matches ) use ( $args ) { + return is_string( $args[ $matches[1] ] ) + ? $args[ $matches[1] ] + : ''; + }, + $text + ); + + if ( $new_text !== $text ) { + $processor->set_modifiable_text( $new_text ); + } + } + } + } + + return $processor->get_updated_html(); + } +} diff --git a/src/wp-includes/html-api/class-wp-html.php b/src/wp-includes/html-api/class-wp-html.php new file mode 100644 index 0000000000000..606296d2a9c7c --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html.php @@ -0,0 +1,84 @@ +">', + * array( + * 'profile_url' => 'https://profiles.example.com/username', + * 'name' => $user->display_name + * ) + * ); + * // Outputs: Bobby Tables + * + * Do not escape the values supplied to the argument array! This function will escape each + * parameter's value as needed and additional manual escaping may lead to incorrect output. + * + * ## Syntax. + * + * ### Substitution Placeholders. + * + * - `` finds `named_arg` in the arguments array, escapes its value if possible, + * and replaces the placeholder with the escaped value. These may exist inside double-quoted + * HTML tag attributes or in HTML text content between tags. They cannot be used to output a tag + * name or content inside a comment. + * + * ### Spread Attributes. + * + * - `...named_arg` when found within an HTML tag will lookup `named_arg` in the arguments array + * and, if it's an array, will set the attribute on the tag for each key/value pair whose value + * is a string. The + * + * ## Notes. + * + * - Attributes may only be supplied for a limited set of types: a string value assigns a double-quoted + * attribute value; `true` sets the attribute as a boolean attribute; `null` removes the attribute. + * If provided any other type of value the attribute will be ignored and its existing value persists. + * + * - If multiple HTML attributes are specified for a given tag they will be applied as if calling + * `set_attribute()` in the order they are specified in the template. This includes any attributes + * assigned through the attribute spread syntax. + * + * - Substitutions in text nodes may only contain string values. If provided any other type of value + * the placeholder will be removed with nothing in its place. + * + * - This function currently escapes all value provided in the arguments array. In the future + * it may provide the ability to nest pre-rendered HTML into the template, but this functionality + * is deferred for a future update. + * + * - This function will not replace content inside of SCRIPT, or STYLE elements. + * + * @since 6.5.0 + * + * @access private + * + * @param string $template The HTML template. + * @param string $args Array of key/value pairs providing substitue values for the placeholders. + * @return string The rendered HTML. + */ + public static function render( $template, $args ) { + return WP_HTML_Template::render( $template, $args ); + } +} diff --git a/src/wp-settings.php b/src/wp-settings.php index 893517a119824..8a79dd3185ec5 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -252,6 +252,8 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-token.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; +require ABSPATH . WPINC . '/html-api/class-wp-html-template.php'; +require ABSPATH . WPINC . '/html-api/class-wp-html.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; require ABSPATH . WPINC . '/class-wp-http-curl.php'; diff --git a/tests/phpunit/tests/html-api/wpHtmlTemplate.php b/tests/phpunit/tests/html-api/wpHtmlTemplate.php new file mode 100644 index 0000000000000..dd4859fb2f14b --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlTemplate.php @@ -0,0 +1,106 @@ +" ...div-args inert="">Just a test', + array( + 'count' => 'Hi <3', + 'class' => '5>4', + 'is_inert' => 'inert', + 'div-args' => array( + 'class' => 'hoover', + 'disabled' => true, + ), + ) + ); + + $this->assertSame( + '
Just a <strong>Hi <3</strong> test
', + $html, + 'Failed to properly render template.' + ); + } + + /** + * Ensures that basic attacks on attribute names and values are blocked. + * + * @ticket 60229 + * + * @covers WP_HTML::render + */ + public function test_cannot_break_out_of_tag_with_malicious_attribute_name() { + $html = WP_HTML_Template::render( + '
', + array( + 'class' => '">', + 'args' => array( + '"> double-quoted escape' => 'busted!', + '> tag escape' => 'busted!', + ), + ) + ); + + // The output here should include an escaped `class` attribute and no others, also no other tags. + $processor = new WP_HTML_Tag_Processor( $html ); + $processor->next_tag(); + + $this->assertSame( + 'DIV', + $processor->get_tag(), + "Expected to find DIV tag but found {$processor->get_tag()} instead." + ); + + $this->assertSame( + '">', + $processor->get_attribute( 'class' ), + 'Should have found escaped `class` attribute.' + ); + + $this->assertSame( + array( 'class' ), + $processor->get_attribute_names_with_prefix( '' ), + 'Should have set `class` attribute and no others.' + ); + + $this->assertFalse( + $processor->next_tag(), + "Should not have found any other tags but found {$processor->get_tag()} instead." + ); + } + + /** + * Ensures that basic replacement inside a TEXTAREA subtitutes placeholders. + * + * @ticket 60229 + */ + public function test_replaces_textarea_placeholders() { + $html = WP_HTML_Template::render( + '', + array( 'big' => ' ()' ) + ); + + $this->assertSame( + '', + $html, + 'Should have replaced placeholder with RCDATA escaping rules.' + ); + } +} From 24a2d64a2b7f8ea5f29e754c65688f44c6820c20 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 15 Jan 2024 16:56:05 -0600 Subject: [PATCH 2/4] Test refactor of wp_video_shortcut --- .../html-api/class-wp-html-tag-processor.php | 27 +++++++- .../html-api/class-wp-html-template.php | 4 +- src/wp-includes/media.php | 68 ++++++++++--------- 3 files changed, 64 insertions(+), 35 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index e0c8df835114c..fe4ac3cba3b64 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2899,14 +2899,37 @@ public function set_attribute( $name, $value ) { * > To represent a false value, the attribute has to be omitted altogether. * - HTML5 spec, https://html.spec.whatwg.org/#boolean-attributes */ - if ( false === $value ) { + if ( false === $value || null === $value ) { return $this->remove_attribute( $name ); } if ( true === $value ) { $updated_attribute = $name; } else { - $escaped_new_value = esc_attr( $value ); + $tag_name = $this->get_tag(); + $comparable_name = strtolower( $name ); + + /* + * Escape URL attributes. + * + * @see https://html.spec.whatwg.org/#attributes-3 + */ + if ( + ! str_starts_with( $value, 'data:' ) && ( + 'cite' === $comparable_name || + 'formaction' === $comparable_name || + 'href' === $comparable_name || + 'ping' === $comparable_name || + 'src' === $comparable_name || + ( 'FORM' === $tag_name && 'action' === $comparable_name ) || + ( 'OBJECT' === $tag_name && 'data' === $comparable_name ) || + ( 'VIDEO' === $tag_name && 'poster' === $comparable_name ) + ) + ) { + $escaped_new_value = esc_url( $value ); + } else { + $escaped_new_value = esc_attr( $value ); + } $updated_attribute = "{$name}=\"{$escaped_new_value}\""; } diff --git a/src/wp-includes/html-api/class-wp-html-template.php b/src/wp-includes/html-api/class-wp-html-template.php index 3ad279bf559e1..14ee1b3c670e9 100644 --- a/src/wp-includes/html-api/class-wp-html-template.php +++ b/src/wp-includes/html-api/class-wp-html-template.php @@ -110,7 +110,7 @@ public static function render( $template, $args = array() ) { $spread_name = substr( $attribute_name, 3 ); if ( isset( $args[ $spread_name ] ) && is_array( $args[ $spread_name ] ) ) { foreach ( $args[ $spread_name ] as $key => $value ) { - if ( true === $value || null === $value || is_string( $value ) ) { + if ( true === $value || false === $value || null === $value || is_string( $value ) ) { $processor->set_attribute( $key, $value ); } } @@ -131,7 +131,7 @@ public static function render( $template, $args = array() ) { if ( array_key_exists( $name, $args ) ) { $value = $args[ $name ]; - if ( null === $value ) { + if ( false === $value || null === $value ) { $processor->remove_attribute( $attribute_name ); } elseif ( true === $value ) { $processor->set_attribute( $attribute_name, true ); diff --git a/src/wp-includes/media.php b/src/wp-includes/media.php index 38ec2213b7506..ecf4fd83b5cef 100644 --- a/src/wp-includes/media.php +++ b/src/wp-includes/media.php @@ -381,14 +381,10 @@ function set_post_thumbnail_size( $width = 0, $height = 0, $crop = false ) { * @return string HTML IMG element for given image attachment. */ function get_image_tag( $id, $alt, $title, $align, $size = 'medium' ) { - list( $img_src, $width, $height ) = image_downsize( $id, $size ); - $hwstring = image_hwstring( $width, $height ); - - $title = $title ? 'title="' . esc_attr( $title ) . '" ' : ''; $size_class = is_array( $size ) ? implode( 'x', $size ) : $size; - $class = 'align' . esc_attr( $align ) . ' size-' . esc_attr( $size_class ) . ' wp-image-' . $id; + $class = "align{$align} size-{$size_class} wp-image-{$id}"; /** * Filters the value of the attachment's image tag class attribute. @@ -403,7 +399,19 @@ function get_image_tag( $id, $alt, $title, $align, $size = 'medium' ) { */ $class = apply_filters( 'get_image_tag_class', $class, $id, $align, $size ); - $html = '' . esc_attr( $alt ) . ''; + $html = WP_HTML::render( + <<<'HTML' +</%alt> +HTML, + array( + 'alt' => $alt, + 'class' => $class, + 'height' => (string) $height, + 'src' => $img_src, + 'title' => empty( $title ) ? null : $title, + 'width' => (string) $width, + ) + ); /** * Filters the HTML content for the image tag. @@ -3603,37 +3611,24 @@ function wp_video_shortcode( $attr, $content = '' ) { $html_atts = array( 'class' => $atts['class'], 'id' => sprintf( 'video-%d-%d', $post_id, $instance ), - 'width' => absint( $atts['width'] ), - 'height' => absint( $atts['height'] ), - 'poster' => esc_url( $atts['poster'] ), + 'width' => (string) absint( $atts['width'] ), + 'height' => (string) absint( $atts['height'] ), + 'poster' => empty( $atts['poster'] ) ? null : $atts['poster'], 'loop' => wp_validate_boolean( $atts['loop'] ), 'autoplay' => wp_validate_boolean( $atts['autoplay'] ), 'muted' => wp_validate_boolean( $atts['muted'] ), - 'preload' => $atts['preload'], + 'preload' => empty( $atts['preload'] ) ? null : $attr['preload'], ); - // These ones should just be omitted altogether if they are blank. - foreach ( array( 'poster', 'loop', 'autoplay', 'preload', 'muted' ) as $a ) { - if ( empty( $html_atts[ $a ] ) ) { - unset( $html_atts[ $a ] ); - } - } - - $attr_strings = array(); - foreach ( $html_atts as $k => $v ) { - $attr_strings[] = $k . '="' . esc_attr( $v ) . '"'; - } - $html = ''; if ( 'mediaelement' === $library && 1 === $instance ) { $html .= "\n"; } - $html .= sprintf( '