????JFIF??x?x????'
Server IP : 104.21.30.238 / Your IP : 216.73.216.83 Web Server : LiteSpeed System : Linux premium151.web-hosting.com 4.18.0-553.44.1.lve.el8.x86_64 #1 SMP Thu Mar 13 14:29:12 UTC 2025 x86_64 User : tempvsty ( 647) PHP Version : 8.0.30 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : OFF | Pkexec : OFF Directory : /proc/thread-self/cwd/wp-content/plugins/wordpress-importer/php-toolkit/XML/ |
Upload File : |
<?php namespace WordPress\XML; use function WordPress\Encoding\codepoint_to_utf8_bytes; /** * XML API: WP_XML_Decoder class * * Decodes spans of raw text found inside XML content, * whether found in an attribute or in a text node. * * Do not use this function on the contents of a CDATA section, * as those sections are not encoded with the XML rules unless * they are embedded XML content. * * @package WordPress * @subpackage HTML-API * @since WP_VERSION */ class XMLDecoder { /** * Decodes a span of XML text. * * Example: * * '&' = WP_XML_Decoder::decode( '&' ); * '…' = WP_XML_Decoder::decode( '…' ); * * @todo Add examples of parse failures, and decide if it should fail or not. * * @since WP_VERSION * * @access private * * @param string $text Text document containing span of text to decode. * @return string Decoded UTF-8 string. */ public static function decode( $text ) { $decoded = ''; $end = strlen( $text ); $at = 0; $was_at = 0; while ( $at < $end ) { $next_character_reference_at = strpos( $text, '&', $at ); if ( false === $next_character_reference_at || $next_character_reference_at >= $end ) { break; } $start_of_potential_reference_at = $next_character_reference_at + 1; if ( $start_of_potential_reference_at >= $end ) { // @todo This is an error. The document ended too early; consume the rest as plaintext, which is wrong. break; } /** * First character after the opening `&`. */ $start_of_potential_reference = $text[ $start_of_potential_reference_at ]; /* * If it's a named character reference, it will be one of the five mandated references. * * - `&` * - `'` * - `>` * - `<` * - `"` * * These all must be found within the five successive characters from the `&`. * * Example: * * ╭ ampersand at 9 = $end - 6 * 'XML' ($end = 15) * ╰───┴─ this length must be at least 5 long, * which is $end - 5. */ if ( $next_character_reference_at < $end - 5 && ( 'a' === $start_of_potential_reference || 'g' === $start_of_potential_reference || 'l' === $start_of_potential_reference || 'q' === $start_of_potential_reference ) ) { foreach ( array( 'amp;' => '&', 'apos;' => "'", 'lt;' => '<', 'gt;' => '>', 'quot;' => '"', ) as $name => $substitution ) { if ( 0 === substr_compare( $text, $name, $start_of_potential_reference_at, strlen( $name ) ) ) { $decoded .= substr( $text, $was_at, $next_character_reference_at - $was_at ) . $substitution; $at = $start_of_potential_reference_at + strlen( $name ); $was_at = $at; continue 2; } } // @todo This is an invalid document. It should be communicated. Treat as plaintext and continue. ++$at; continue; } /* * The shortest numerical character reference is four characters. * * Example: * * 	 */ if ( '#' !== $start_of_potential_reference || $next_character_reference_at + 4 >= $end ) { // @todo This is an error. This ampersand _must_ be encoded. Treat as plaintext and move on. ++$at; continue; } $is_hex = 'x' === $text[ $start_of_potential_reference_at + 1 ]; if ( $is_hex ) { $zeros_at = $start_of_potential_reference_at + 2; $base = 16; $digit_chars = '0123456789abcdefABCDEF'; $max_digits = 6; // ``. } else { $zeros_at = $start_of_potential_reference_at + 1; $base = 10; $digit_chars = '0123456789'; $max_digits = 7; // ``. } $zero_count = strspn( $text, '0', $zeros_at ); $digits_at = $zeros_at + $zero_count; $digit_count = strspn( $text, $digit_chars, $digits_at, $max_digits ); $semi_at = $digits_at + $digit_count; if ( 0 === $digit_count || $semi_at >= $end || ';' !== $text[ $semi_at ] ) { // @todo This is an error. Treat as plaintext and move on. ++$at; continue; } $codepoint = intval( substr( $text, $digits_at, $digit_count ), $base ); $character_reference = codepoint_to_utf8_bytes( $codepoint ); if ( '�' === $character_reference && 0xFFFD !== $codepoint ) { /* * Stop processing if we got an invalid character AND the reference does not * specifically refer code point FFFD (�). * * > It is a fatal error when an XML processor encounters an entity with an * > encoding that it is unable to process. It is a fatal error if an XML entity * > is determined (via default, encoding declaration, or higher-level protocol) * > to be in a certain encoding but contains byte sequences that are not legal * > in that encoding. Specifically, it is a fatal error if an entity encoded in * > UTF-8 contains any ill-formed code unit sequences, as defined in section * > 3.9 of Unicode [Unicode]. Unless an encoding is determined by a higher-level * > protocol, it is also a fatal error if an XML entity contains no encoding * > declaration and its content is not legal UTF-8 or UTF-16. * * See https://www.w3.org/TR/xml/#charencoding */ // @todo This is an error. Treat as plaintext and continue, which is wrong. ++$at; continue; } $decoded .= substr( $text, $was_at, $at - $was_at ); $decoded .= $character_reference; $at = $semi_at + 1; $was_at = $at; } if ( 0 === $was_at ) { return $text; } if ( $was_at < $end ) { $decoded .= substr( $text, $was_at, $end - $was_at ); } return $decoded; } /** * Finds and parses the next entity in a given text starting after the * given byte offset, and being entirely found within the given max length. * * @since {WP_VERSION} * * // @todo Implement this function. * * @param string $text Text in which to search for an XML entity. * @param int $starting_byte_offset Start looking after this byte offset. * @param int $ending_byte_offset Stop looking if entity is not fully contained before this byte offset. * @param int|null $entity_at Optional. If provided, will be set to byte offset where entity was * found, if found. Otherwise, will not be set. * * @return string|null Parsed entity, if parsed, otherwise `null`. */ public static function next_entity( string $text, int $starting_byte_offset, int $ending_byte_offset, ?int &$entity_at = null ): ?string { $at = $starting_byte_offset; $end = $ending_byte_offset; while ( $at < $end ) { $remaining = $end - $at; $amp_after = strcspn( $text, '&', $at, $remaining ); // There are no more possible entities. if ( $amp_after === $remaining ) { return null; } /* * @todo Move the decoding logic from `decode()` above into here, * then call this function in a loop from `decode()`. */ ++$at; } return null; } }