diff --git a/Build/phpstan/phpstan-baseline.neon b/Build/phpstan/phpstan-baseline.neon index 2241f6af217bf9c5ccd465242dce7af467bb42ca..76375907c52ceef229589a819baac5d345df36a5 100644 --- a/Build/phpstan/phpstan-baseline.neon +++ b/Build/phpstan/phpstan-baseline.neon @@ -2270,6 +2270,11 @@ parameters: count: 2 path: ../../typo3/sysext/core/Tests/Unit/Resource/ResourceStorageTest.php + - + message: "#^Unreachable statement \\- code above always terminates\\.$#" + count: 1 + path: ../../typo3/sysext/core/Tests/Unit/Serializer/Typo3XmlParserTest.php + - message: "#^Constructor of class TYPO3\\\\CMS\\\\Core\\\\Tests\\\\Unit\\\\Tree\\\\TableConfiguration\\\\Fixtures\\\\TreeDataProviderFixture has an unused parameter \\$configuration\\.$#" count: 1 diff --git a/typo3/sysext/core/Classes/Serializer/Exception/InvalidDataException.php b/typo3/sysext/core/Classes/Serializer/Exception/InvalidDataException.php new file mode 100644 index 0000000000000000000000000000000000000000..bced45ffd3e5cf618f5ae2060e65c8daf2eb182e --- /dev/null +++ b/typo3/sysext/core/Classes/Serializer/Exception/InvalidDataException.php @@ -0,0 +1,27 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Serializer\Exception; + +use TYPO3\CMS\Core\Exception; + +/** + * An exception if something is wrong with the data to be encoded or decoded + */ +class InvalidDataException extends Exception +{ +} diff --git a/typo3/sysext/core/Classes/Serializer/Typo3XmlParser.php b/typo3/sysext/core/Classes/Serializer/Typo3XmlParser.php new file mode 100644 index 0000000000000000000000000000000000000000..c7e23af80a582bd6bb9803d58cb54f99a9326fc6 --- /dev/null +++ b/typo3/sysext/core/Classes/Serializer/Typo3XmlParser.php @@ -0,0 +1,253 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Serializer; + +use TYPO3\CMS\Core\Serializer\Exception\InvalidDataException; +use TYPO3\CMS\Core\Utility\MathUtility; + +/** + * Decodes XML string to PHP array. + * + * A dedicated set of node attributes is considered during conversion: + * - attribute "index" specifies the final node name which is used as key in the PHP array + * - attribute "type" specifies the node value type which is used for casting + * - attribute "base64" specifies the node value type being binary and requiring a + * base64-decoding + * These attributes were applied during encoding of the PHP array with XmlEncoder::encode(). + * + * The node name "n{number}" is converted to a number-indexed array key "{number}". + * + * @internal still experimental + */ +class Typo3XmlParser +{ + /** + * This method serves as a wrapper for decode() and is used to replace + * GeneralUtility::xml2array(), which returns an exception as a string instead of throwing it. + * In perspective, all uses of this method should be replaced by decode() and the exceptions + * should be handled locally. + * + * @param string $xml XML string + * @param Typo3XmlSerializerOptions|null $options Decoding configuration - see decode() for details + * @return array|string PHP array - or a string if the XML root node is empty or an exception + */ + public function decodeWithReturningExceptionAsString( + string $xml, + Typo3XmlSerializerOptions $options = null + ) { + try { + return $this->decode($xml, $options); + } catch (\Throwable $e) { + return $e->getMessage(); + } + } + + /** + * @param string $xml XML string + * @param Typo3XmlSerializerOptions|null $options Apply specific decoding configuration - Ignored node types, libxml2 options, ... + * @return array|string PHP array - or a string if the XML root node is empty + * @throws InvalidDataException + */ + public function decode( + string $xml, + Typo3XmlSerializerOptions $options = null + ) { + $xml = trim($xml); + if ($xml === '') { + throw new InvalidDataException( + 'Invalid XML data, it can not be empty.', + 1630773210 + ); + } + + $options = $options ?? new Typo3XmlSerializerOptions(); + + if ($options->allowUndefinedNamespaces()) { + $xml = $this->disableNamespaceInNodeNames($xml); + } + + $internalErrors = libxml_use_internal_errors(true); + libxml_clear_errors(); + + $dom = new \DOMDocument(); + $dom->loadXML($xml, $options->getLoadOptions()); + + libxml_use_internal_errors($internalErrors); + + if ($error = libxml_get_last_error()) { + libxml_clear_errors(); + throw new InvalidDataException( + 'Line ' . $error->line . ': ' . xml_error_string($error->code), + 1630773230 + ); + } + + $rootNode = null; + foreach ($dom->childNodes as $child) { + if ($child->nodeType === \XML_DOCUMENT_TYPE_NODE) { + throw new InvalidDataException( + 'Document types are not allowed.', + 1630773261 + ); + } + if (in_array($child->nodeType, $options->getIgnoredNodeTypes(), true)) { + continue; + } + $rootNode = $child; + break; + } + if ($rootNode === null) { + throw new InvalidDataException( + 'Root node cannot be determined.', + 1630773276 + ); + } + + $rootNodeName = $rootNode->nodeName; + if ($options->allowUndefinedNamespaces()) { + $rootNodeName = $this->reactivateNamespaceInNodeName($rootNodeName); + } + if (!$rootNode->hasChildNodes()) { + if ($options->includeRootNode()) { + $result = [$rootNodeName => $rootNode->nodeValue]; + } else { + $result = $rootNode->nodeValue; + } + } else { + if ($options->includeRootNode()) { + $result = [$rootNodeName => $this->parseXml($rootNode, $options)]; + } else { + $result = $this->parseXml($rootNode, $options); + } + } + if ($options->returnRootNodeName() && is_array($result)) { + $result['_DOCUMENT_TAG'] = $rootNodeName; + } + + return $result; + } + + /** + * DOMDocument::loadXML() breaks if prefixes of undefined namespaces are used in node names: + * Replace namespace divider ":" by temporary "___" string before parsing the XML. + */ + protected function disableNamespaceInNodeNames(string $value): string + { + return preg_replace( + ['#<([/]?)([[:alnum:]_-]*):([[:alnum:]_-]*)([ >]?)#'], + ['<$1$2___$3$4'], + $value + ); + } + + /** + * Re-insert the namespace divider ":" into all node names again after parsing the XML. + */ + protected function reactivateNamespaceInNodeNames(string $value): string + { + if (!str_contains($value, '___')) { + return $value; + } + + return preg_replace( + ['#<([/]?)([[:alnum:]_-]*)___([[:alnum:]_-]*)([ >]?)#'], + ['<$1$2:$3$4'], + $value + ); + } + + /** + * Re-insert the namespace divider ":" into single node name again after parsing the XML. + */ + protected function reactivateNamespaceInNodeName(string $value): string + { + return str_replace('___', ':', $value); + } + + /** + * @return array|string|null + */ + protected function parseXml(\DOMNode $node, Typo3XmlSerializerOptions $options) + { + if (!$node->hasChildNodes()) { + return $node->nodeValue; + } + + if ($node->childNodes->length === 1 + && in_array($node->firstChild->nodeType, [\XML_TEXT_NODE, \XML_CDATA_SECTION_NODE]) + ) { + $value = $node->firstChild->nodeValue; + if ($options->allowUndefinedNamespaces()) { + $value = $this->reactivateNamespaceInNodeNames($value); + } + return $value; + } + + $result = []; + foreach ($node->childNodes as $child) { + if (in_array($child->nodeType, $options->getIgnoredNodeTypes(), true)) { + continue; + } + + $value = $this->parseXml($child, $options); + + if ($child instanceof \DOMElement && $child->hasAttribute('index')) { + $key = $child->getAttribute('index'); + } else { + $key = $child->nodeName; + if ($options->allowUndefinedNamespaces()) { + $key = $this->reactivateNamespaceInNodeName($key); + } + if ($options->hasNamespacePrefix() + && str_starts_with($key, $options->getNamespacePrefix()) + ) { + $key = substr($key, strlen($options->getNamespacePrefix())); + } + if (str_starts_with($key, 'n') + && MathUtility::canBeInterpretedAsInteger($index = substr($key, 1)) + ) { + $key = (int)$index; + } + } + + if ($child instanceof \DOMElement && $child->hasAttribute('base64') && is_string($value)) { + $value = base64_decode($value); + } elseif ($child instanceof \DOMElement && $child->hasAttribute('type')) { + switch ($child->getAttribute('type')) { + case 'integer': + $value = (int)$value; + break; + case 'double': + $value = (float)$value; + break; + case 'boolean': + $value = (bool)$value; + break; + case 'NULL': + $value = null; + break; + case 'array': + $value = is_array($value) ? $value : (empty(trim($value)) ? [] : (array)$value); + break; + } + } + $result[$key] = $value; + } + return $result; + } +} diff --git a/typo3/sysext/core/Classes/Serializer/Typo3XmlParserOptions.php b/typo3/sysext/core/Classes/Serializer/Typo3XmlParserOptions.php new file mode 100644 index 0000000000000000000000000000000000000000..74f75c7e2211aea90ac882ed46aa6c72627927e8 --- /dev/null +++ b/typo3/sysext/core/Classes/Serializer/Typo3XmlParserOptions.php @@ -0,0 +1,74 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Serializer; + +/** + * @internal still experimental + */ +class Typo3XmlParserOptions +{ + public const FORMAT = 'format'; + public const FORMAT_INLINE = -1; + public const FORMAT_PRETTY_WITH_TAB = 0; + public const NAMESPACE_PREFIX = 'namespace_prefix'; + public const ROOT_NODE_NAME = 'root_node_name'; + + protected array $options = [ + // Format XML with + // - "-1" is inline XML + // - "0" is pretty XML with tabs + // - "1...x" is pretty XML with x spaces. + self::FORMAT => self::FORMAT_PRETTY_WITH_TAB, + // This XML namespace is prepended to each XML node, for example "T3:". + self::NAMESPACE_PREFIX => '', + // Wrap the XML with a root node of that name or set it to '' to skip wrapping. + self::ROOT_NODE_NAME => 'phparray', + ]; + + public function __construct(array $options = []) + { + $this->options = array_merge($this->options, $options); + } + + public function getRootNodeName(): string + { + return $this->options[self::ROOT_NODE_NAME]; + } + + public function getNewlineChar(): string + { + return $this->options[self::FORMAT] === self::FORMAT_INLINE ? '' : LF; + } + + public function getIndentationStep(): string + { + switch ($this->options[self::FORMAT]) { + case self::FORMAT_INLINE: + return ''; + case self::FORMAT_PRETTY_WITH_TAB: + return "\t"; + default: + return str_repeat(' ', max(0, $this->options[self::FORMAT])); + } + } + + public function getNamespacePrefix(): string + { + return $this->options[self::NAMESPACE_PREFIX]; + } +} diff --git a/typo3/sysext/core/Classes/Serializer/Typo3XmlSerializer.php b/typo3/sysext/core/Classes/Serializer/Typo3XmlSerializer.php new file mode 100644 index 0000000000000000000000000000000000000000..0a4477652ca78b87a103a0eb32afa93d47cb4b5f --- /dev/null +++ b/typo3/sysext/core/Classes/Serializer/Typo3XmlSerializer.php @@ -0,0 +1,340 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Serializer; + +use TYPO3\CMS\Core\Utility\MathUtility; + +/** + * Encodes PHP array to XML string. + * + * A dedicated set of entry properties is stored in XML during conversion: + * - XML node attribute "index" stores original entry key if XML node name differs from entry + * key + * - XML node attribute "type" stores entry value type ("bool", "int", "double", ...) + * - XML node attribute "base64" specifies if entry value is binary (for example an image) + * These attributes are interpreted during decoding of the XML string with XmlDecoder::decode(). + * + * Specific encoding configuration can be set by $additionalOptions - for the full array or array paths. + * For example + * ```php + * $input = [ + * 'numeric' => [ + * 'value1', + * 'value2' + * ], + * 'numeric-n-index' => [ + * 'value1', + * 'value2' + * ], + * 'nested' => [ + * 'node1' => 'value1', + * 'node2' => [ + * 'node' => 'value' + * ] + * ] + * ]; + * $additionalOptions = [ + * 'useIndexTagForNum' => 'numbered-index' + * 'alt_options' => [ + * '/numeric-n-index' => [ + * 'useNindex' => true + * ], + * '/nested' => [ + * 'useIndexTagForAssoc' => 'nested-outer', + * 'clearStackPath' => true, + * 'alt_options' => [ + * '/nested-outer' => [ + * 'useIndexTagForAssoc' => 'nested-inner' + * ] + * ] + * ] + * ] + * ]; + * ``` + * => + * ```xml + * <phparray> + * <numeric type="array"> + * <numbered-index index="0">value1</numbered-index> + * <numbered-index index="1">value2</numbered-index> + * </numeric> + * <numeric-n-index type="array"> + * <n0>value1</n0> + * <n1>value2</n1> + * </numeric-n-index> + * <nested type="array"> + * <nested-outer index="node1">value1</nested-outer> + * <nested-outer index="node2" type="array"> + * <nested-inner index="node">value</nested-inner> + * </nested-outer> + * </nested> + * </phparray> + * ``` + * Available options are: + * - grandParentTagMap[grandParentTagName/parentTagName] [string] + * Convert array key X to XML node name "{grandParentTagMap}" with node attribute "index=X" + * - if grand-parent is "{grandParentTagName}" and parent node is "{parentTagName}". + * - parentTagMap[parentTagName:_IS_NUM] [string] + * Convert array key X to XML node name "{parentTagMap}" with node attribute "index=X" + * - if parent node is "{parentTagName}" and current node is number-indexed. + * - parentTagMap[parentTagName:nodeName] [string] + * Convert array key X to XML node name "{parentTagMap}" with node attribute "index=X" + * - if parent node is "{parentTagName}" and current node is "{nodeName}". + * - parentTagMap[parentTagName] [string] + * Convert array key X to XML node name "{parentTagMap}" with node attribute "index=X" + * - if parent node is "{parentTagName}". + * - useNindex [bool] + * Convert number-indexed array key X to XML node name "nX". + * - useIndexTagForNum [string] + * Convert number-indexed array key X to XML node name "{useIndexTagForNum}" with node + * attribute "index=X". + * - useIndexTagForAssoc [string] + * Convert associative array key X to XML node name "{useIndexTagForAssoc}" with node + * attribute "index=X". + * - disableTypeAttrib [bool|int] + * Disable node attribute "type" for all value types + * (true = disable for all except arrays, 2 = disable for all). + * - useCDATA [bool] + * Wrap node value with <![CDATA[{node value}]]> - if text contains special characters. + * - alt_options[/.../nodeName] [array] + * Set new options for specific array path. + * - clearStackPath [bool] + * Resetting internal counter when descending the array hierarchy: Allows using relative + * array path in nested "alt_options" instead of absolute path. + * + * @internal still experimental + */ +class Typo3XmlSerializer +{ + /** + * This method serves as a wrapper for encode() and is used to replace + * GeneralUtility::array2xml(), which returns an exception as a string instead of throwing it. + * In perspective, all uses of this method should be replaced by encode() and the exceptions + * should be handled locally. + * + * @param array $input PHP array + * @param Typo3XmlParserOptions|null $options Encoding configuration - see encode() for details + * @param array $additionalOptions Encoding options - see encode() for details + * @return string XML or exception + */ + public function encodeWithReturningExceptionAsString( + array $input, + Typo3XmlParserOptions $options = null, + array $additionalOptions = [] + ): string { + try { + return $this->encode($input, $options, $additionalOptions); + } catch (\Throwable $e) { + return $e->getMessage(); + } + } + + /** + * @param array $input PHP array + * @param Typo3XmlParserOptions|null $options Apply specific encoding configuration - XML format, namespace prefix and root node name + * @param array $additionalOptions Apply specific encoding options - for the full array or specific array paths. + * @return string XML string + */ + public function encode( + array $input, + Typo3XmlParserOptions $options = null, + array $additionalOptions = [] + ): string { + $options = $options ?? new Typo3XmlParserOptions(); + return $this->parseArray( + $input, + $options, + $additionalOptions + ); + } + + protected function parseArray( + array $input, + Typo3XmlParserOptions $options, + array $additionalOptions, + int $level = 0, + array $stackData = [] + ): string { + $xml = ''; + + $rootNodeName = $options->getRootNodeName(); + if (empty($rootNodeName)) { + $indentation = str_repeat($options->getIndentationStep(), $level); + } else { + $indentation = str_repeat($options->getIndentationStep(), $level + 1); + } + + foreach ($input as $key => $value) { + // Construct the node name + attributes + $nodeName = $key = (string)$key; + $nodeAttributes = ''; + if (isset( + $stackData['grandParentTagName'], + $stackData['parentTagName'], + $additionalOptions['grandParentTagMap'][$stackData['grandParentTagName'] . '/' . $stackData['parentTagName']] + )) { + // ... based on grand-parent + parent node name + $nodeName = (string)$additionalOptions['grandParentTagMap'][$stackData['grandParentTagName'] . '/' . $stackData['parentTagName']]; + $nodeAttributes = ' index="' . htmlspecialchars($key) . '"'; + } elseif (isset( + $stackData['parentTagName'], + $additionalOptions['parentTagMap'][$stackData['parentTagName'] . ':_IS_NUM'] + ) && MathUtility::canBeInterpretedAsInteger($nodeName) + ) { + // ... based on parent node name + if current node name is numeric + $nodeName = (string)$additionalOptions['parentTagMap'][$stackData['parentTagName'] . ':_IS_NUM']; + $nodeAttributes = ' index="' . htmlspecialchars($key) . '"'; + } elseif (isset( + $stackData['parentTagName'], + $additionalOptions['parentTagMap'][$stackData['parentTagName'] . ':' . $nodeName] + )) { + // ... based on parent node name + current node name + $nodeName = (string)$additionalOptions['parentTagMap'][$stackData['parentTagName'] . ':' . $nodeName]; + $nodeAttributes = ' index="' . htmlspecialchars($key) . '"'; + } elseif (isset( + $stackData['parentTagName'], + $additionalOptions['parentTagMap'][$stackData['parentTagName']] + )) { + // ... based on parent node name + $nodeName = (string)$additionalOptions['parentTagMap'][$stackData['parentTagName']]; + $nodeAttributes = ' index="' . htmlspecialchars($key) . '"'; + } elseif (MathUtility::canBeInterpretedAsInteger($nodeName)) { + // ... if current node name is numeric + if ($additionalOptions['useNindex'] ?? false) { + $nodeName = 'n' . $nodeName; + } else { + $nodeName = ($additionalOptions['useIndexTagForNum'] ?? false) ?: 'numIndex'; + $nodeAttributes = ' index="' . $key . '"'; + } + } elseif (!empty($additionalOptions['useIndexTagForAssoc'])) { + // ... if current node name is string + $nodeName = $additionalOptions['useIndexTagForAssoc']; + $nodeAttributes = ' index="' . htmlspecialchars($key) . '"'; + } + $nodeName = $this->cleanUpNodeName($nodeName); + + // Construct the node value + if (is_array($value)) { + // ... if has sub elements + if (isset($additionalOptions['alt_options']) + && ($additionalOptions['alt_options'][($stackData['path'] ?? '') . '/' . $nodeName] ?? false) + ) { + $subOptions = $additionalOptions['alt_options'][($stackData['path'] ?? '') . '/' . $nodeName]; + $clearStackPath = (bool)($subOptions['clearStackPath'] ?? false); + } else { + $subOptions = $additionalOptions; + $clearStackPath = false; + } + if (empty($value)) { + $nodeValue = ''; + } else { + $nodeValue = $options->getNewlineChar(); + $nodeValue .= $this->parseArray( + $value, + $options, + $subOptions, + $level + 1, + [ + 'parentTagName' => $nodeName, + 'grandParentTagName' => $stackData['parentTagName'] ?? '', + 'path' => $clearStackPath ? '' : ($stackData['path'] ?? '') . '/' . $nodeName, + ] + ); + $nodeValue .= $indentation; + } + // Dropping the "type=array" attribute makes the XML prettier, but means that empty + // arrays are not restored with XmlDecoder::decode(). + if (($additionalOptions['disableTypeAttrib'] ?? false) !== 2) { + $nodeAttributes .= ' type="array"'; + } + } else { + // ... if is simple value + if ($this->isBinaryValue($value)) { + $nodeValue = $options->getNewlineChar() . chunk_split(base64_encode($value)); + $nodeAttributes .= ' base64="1"'; + } else { + $type = gettype($value); + if ($type === 'string') { + $nodeValue = htmlspecialchars($value); + if (($additionalOptions['useCDATA'] ?? false) && $nodeValue !== $value) { + $nodeValue = '<![CDATA[' . $value . ']]>'; + } + } else { + $nodeValue = $value; + if (($additionalOptions['disableTypeAttrib'] ?? false) === false) { + $nodeAttributes .= ' type="' . $type . '"'; + } + } + } + } + + // Construct the node + if ($nodeName !== '') { + $xml .= $indentation; + $xml .= '<' . $options->getNamespacePrefix() . $nodeName . $nodeAttributes . '>'; + $xml .= $nodeValue; + $xml .= '</' . $options->getNamespacePrefix() . $nodeName . '>'; + $xml .= $options->getNewlineChar(); + } + } + + // Wrap with the root node if it is on the outermost level. + if ($level === 0 && !empty($rootNodeName)) { + $xml = '<' . $rootNodeName . '>' . $options->getNewlineChar() . $xml . '</' . $rootNodeName . '>'; + } + + return $xml; + } + + /** + * The node name is cleaned so that it contains only alphanumeric characters (plus - and _) and + * is no longer than 100 characters. + * + * @param string $nodeName + * @return string Cleaned node name + */ + protected function cleanUpNodeName(string $nodeName): string + { + return substr((string)preg_replace('/[^[:alnum:]_-]/', '', $nodeName), 0, 100); + } + + /** + * Is $value the content of a binary file, for example an image? If so, this value must be + * stored in a binary-safe manner so that it can be decoded correctly later. + * + * @param mixed $value + * @return bool + */ + protected function isBinaryValue($value): bool + { + if (!is_string($value)) { + return false; + } + + $binaryChars = "\0" . chr(1) . chr(2) . chr(3) . chr(4) . chr(5) + . chr(6) . chr(7) . chr(8) . chr(11) . chr(12) + . chr(14) . chr(15) . chr(16) . chr(17) . chr(18) + . chr(19) . chr(20) . chr(21) . chr(22) . chr(23) + . chr(24) . chr(25) . chr(26) . chr(27) . chr(28) + . chr(29) . chr(30) . chr(31); + + $length = strlen($value); + + return $length && strcspn($value, $binaryChars) !== $length; + } +} diff --git a/typo3/sysext/core/Classes/Serializer/Typo3XmlSerializerOptions.php b/typo3/sysext/core/Classes/Serializer/Typo3XmlSerializerOptions.php new file mode 100644 index 0000000000000000000000000000000000000000..f02a41845d14dd91389d9c0581bb711dc9da4fc6 --- /dev/null +++ b/typo3/sysext/core/Classes/Serializer/Typo3XmlSerializerOptions.php @@ -0,0 +1,79 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Serializer; + +/** + * @internal still experimental + */ +class Typo3XmlSerializerOptions +{ + public const INCLUDE_ROOT_NODE = 'include_root_node'; + public const IGNORED_NODE_TYPES = 'ignored_node_types'; + public const LOAD_OPTIONS = 'load_options'; + public const NAMESPACE_PREFIX = 'namespace_prefix'; + public const ALLOW_UNDEFINED_NAMESPACES = 'allow_undefined_namespaces'; + public const RETURN_ROOT_NODE_NAME = 'return_root_node_name'; + + protected array $options = [ + // Ignore XML node types when converting to a PHP array. + self::IGNORED_NODE_TYPES => [\XML_PI_NODE, \XML_COMMENT_NODE], + // Use the XML root node or its children as the first level of the PHP array. + self::INCLUDE_ROOT_NODE => false, + // Apply these libxml2 options when loading the XML. + self::LOAD_OPTIONS => \LIBXML_NONET | \LIBXML_NOBLANKS, + // Remove this XML namespace from each XML node, for example "T3:". + self::NAMESPACE_PREFIX => '', + // Gracefully handle missing namespace declarations, for example <T3:T3FlexForms> without xmlns attribute. + self::ALLOW_UNDEFINED_NAMESPACES => false, + // Append the name of the XML root node to the PHP array key "_DOCUMENT_TAG". + self::RETURN_ROOT_NODE_NAME => false, + ]; + + public function __construct(array $options = []) + { + $this->options = array_merge($this->options, $options); + } + public function getLoadOptions(): int + { + return $this->options[self::LOAD_OPTIONS]; + } + public function getIgnoredNodeTypes(): array + { + return $this->options[self::IGNORED_NODE_TYPES]; + } + public function includeRootNode(): bool + { + return $this->options[self::INCLUDE_ROOT_NODE]; + } + public function hasNamespacePrefix(): bool + { + return $this->options[self::NAMESPACE_PREFIX] !== ''; + } + public function getNamespacePrefix(): string + { + return $this->options[self::NAMESPACE_PREFIX]; + } + public function allowUndefinedNamespaces(): bool + { + return $this->options[self::ALLOW_UNDEFINED_NAMESPACES]; + } + public function returnRootNodeName(): bool + { + return $this->options[self::RETURN_ROOT_NODE_NAME]; + } +} diff --git a/typo3/sysext/core/Tests/Unit/Serializer/Fixtures/file.gif b/typo3/sysext/core/Tests/Unit/Serializer/Fixtures/file.gif new file mode 100644 index 0000000000000000000000000000000000000000..e4eec924aa745f3aec48d0fd59a0238f6536f3f8 Binary files /dev/null and b/typo3/sysext/core/Tests/Unit/Serializer/Fixtures/file.gif differ diff --git a/typo3/sysext/core/Tests/Unit/Serializer/Typo3XmlParserTest.php b/typo3/sysext/core/Tests/Unit/Serializer/Typo3XmlParserTest.php new file mode 100644 index 0000000000000000000000000000000000000000..b227c03916e48de7234895aaa23c6c076612a537 --- /dev/null +++ b/typo3/sysext/core/Tests/Unit/Serializer/Typo3XmlParserTest.php @@ -0,0 +1,659 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Tests\Unit\Serializer; + +use TYPO3\CMS\Core\Exception; +use TYPO3\CMS\Core\Serializer\Typo3XmlParser; +use TYPO3\CMS\Core\Serializer\Typo3XmlParserOptions; +use TYPO3\CMS\Core\Serializer\Typo3XmlSerializer; +use TYPO3\CMS\Core\Serializer\Typo3XmlSerializerOptions; +use TYPO3\TestingFramework\Core\Unit\UnitTestCase; + +class Typo3XmlParserTest extends UnitTestCase +{ + public static function decodeReturnsStringOrArrayDataProvider(): array + { + return [ + 'EmptyRootNode' => [ + '<phparray></phparray>', + '', + ], + 'RootNodeContainsText' => [ + '<phparray>content</phparray>', + 'content', + ], + 'RootNodeContainsSubNode' => [ + '<phparray><node>content</node></phparray>', + ['node' => 'content'], + ], + ]; + } + + /** + * @test + * @dataProvider decodeReturnsStringOrArrayDataProvider + */ + public function decodeReturnsStringOrArray(string $data, $expected): void + { + $xmlDecoder = new Typo3XmlParser(); + $result = $xmlDecoder->decode($data); + self::assertEquals($expected, $result); + } + + public static function decodeHandlesCommentsDataProvider(): array + { + return [ + 'IgnoreComments' => [ + [], + ['node' => 'content'], + ], + 'IgnoreCommentsToo' => [ + [Typo3XmlSerializerOptions::IGNORED_NODE_TYPES => [\XML_COMMENT_NODE]], + ['node' => 'content'], + ], + 'DoNotIgnoreComments' => [ + [Typo3XmlSerializerOptions::IGNORED_NODE_TYPES => []], + ['node' => 'content', '#comment' => ' Comment '], + ], + ]; + } + + /** + * @test + * @dataProvider decodeHandlesCommentsDataProvider + */ + public function decodeHandlesComments(array $config, array $expected): void + { + $xmlDecoder = new Typo3XmlParser(); + $result = $xmlDecoder->decode('<phparray attribute="ignored"> + <!-- Comment --> + <node>content</node> +</phparray>', new Typo3XmlSerializerOptions($config)); + self::assertEquals($expected, $result); + } + + /** + * @test + */ + public function decodeIgnoresNodeAttributes(): void + { + $xmlDecoder = new Typo3XmlParser(); + $result = $xmlDecoder->decode('<phparray attribute="ignored"> + <node attribute="ignored">content</node> +</phparray>'); + self::assertEquals(['node' => 'content'], $result); + } + + /** + * @return string[][] + */ + public static function decodeHandlesWhitespacesDataProvider(): array + { + $headerVariants = [ + 'utf-8' => '<?xml version="1.0" encoding="utf-8" standalone="yes"?>', + 'UTF-8' => '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>', + 'no-encoding' => '<?xml version="1.0" standalone="yes"?>', + 'iso-8859-1' => '<?xml version="1.0" encoding="iso-8859-1" standalone="yes"?>', + 'ISO-8859-1' => '<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>', + ]; + $data = []; + foreach ($headerVariants as $identifier => $headerVariant) { + $data += [ + 'inputWithoutWhitespaces-' . $identifier => [ + $headerVariant . '<T3FlexForms> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">egon</value> + </field> + </data> + </T3FlexForms>', + ], + 'inputWithPrecedingWhitespaces-' . $identifier => [ + CR . ' ' . $headerVariant . '<T3FlexForms> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">egon</value> + </field> + </data> + </T3FlexForms>', + ], + 'inputWithTrailingWhitespaces-' . $identifier => [ + $headerVariant . '<T3FlexForms> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">egon</value> + </field> + </data> + </T3FlexForms>' . CR . ' ', + ], + 'inputWithPrecedingAndTrailingWhitespaces-' . $identifier => [ + CR . ' ' . $headerVariant . '<T3FlexForms> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">egon</value> + </field> + </data> + </T3FlexForms>' . CR . ' ', + ], + ]; + } + return $data; + } + + /** + * @test + * @dataProvider decodeHandlesWhitespacesDataProvider + * @param string $input + */ + public function decodeHandlesWhitespaces(string $input): void + { + $xmlDecoder = new Typo3XmlParser(); + $expected = [ + 'data' => [ + 'settings.persistenceIdentifier' => [ + 'vDEF' => 'egon', + ], + ], + ]; + self::assertSame($expected, $xmlDecoder->decode($input)); + } + + /** + * @return array<string, mixed> + */ + public static function decodeHandlesTagNamespacesDataProvider(): array + { + return [ + 'inputWithNameSpaceOnRootLevel' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3:T3FlexForms xmlns:T3="https://typo3.org/ns/T3"> + <data> + <field> + <value index="vDEF1">egon</value> + <value index="vDEF2"><![CDATA[egon<CDATA:tag>olsen]]></value> + </field> + </data> + </T3:T3FlexForms>', + [ + 'data' => [ + 'field' => [ + 'vDEF1' => 'egon', + 'vDEF2' => 'egon<CDATA:tag>olsen', + ], + ], + ], + ], + 'inputWithNameSpaceOnNonRootLevel' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3FlexForms xmlns:T3="https://typo3.org/ns/T3"> + <data> + <T3:field> + <value index="vDEF1">egon</value> + <value index="vDEF2"><![CDATA[egon<CDATA:tag>olsen]]></value> + </T3:field> + </data> + </T3FlexForms>', + [ + 'data' => [ + 'field' => [ + 'vDEF1' => 'egon', + 'vDEF2' => 'egon<CDATA:tag>olsen', + ], + ], + ], + ], + 'inputWithNameSpaceOnRootAndNonRootLevel' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3:T3FlexForms xmlns:T3="https://typo3.org/ns/T3"> + <data> + <T3:field> + <value index="vDEF1">egon</value> + <value index="vDEF2"><![CDATA[egon<CDATA:tag>olsen]]></value> + </T3:field> + </data> + </T3:T3FlexForms>', + [ + 'data' => [ + 'field' => [ + 'vDEF1' => 'egon', + 'vDEF2' => 'egon<CDATA:tag>olsen', + ], + ], + ], + ], + 'inputWithUndefinedNamespace' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3:T3FlexForms> + <data> + <T3:field> + <value index="vDEF1">egon</value> + <value index="vDEF2"><![CDATA[egon<CDATA:tag>olsen]]></value> + </T3:field> + </data> + </T3:T3FlexForms>', + [ + 'data' => [ + 'field' => [ + 'vDEF1' => 'egon', + 'vDEF2' => 'egon<CDATA:tag>olsen', + ], + ], + ], + [Typo3XmlSerializerOptions::ALLOW_UNDEFINED_NAMESPACES => true], + ], + ]; + } + + /** + * @test + * @dataProvider decodeHandlesTagNamespacesDataProvider + */ + public function decodeHandlesTagNamespaces(string $input, array $expected, array $options = []): void + { + $xmlDecoder = new Typo3XmlParser(); + self::assertSame( + $expected, + $xmlDecoder->decode($input, new Typo3XmlSerializerOptions($options + [Typo3XmlSerializerOptions::NAMESPACE_PREFIX => 'T3:'])) + ); + } + + /** + * @return array[] + */ + public static function decodeReturnsRootNodeNameDataProvider(): array + { + return [ + 'input' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3FlexForms> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">egon</value> + </field> + </data> + </T3FlexForms>', + 'T3FlexForms', + ], + 'input-with-root-namespace' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3:T3FlexForms xmlns:T3="https://typo3.org/ns/T3"> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">egon</value> + </field> + </data> + </T3:T3FlexForms>', + 'T3:T3FlexForms', + ], + 'input-with-namespace' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3FlexForms xmlns:T3="https://typo3.org/ns/T3"> + <data> + <T3:field index="settings.persistenceIdentifier"> + <value index="vDEF">egon</value> + </T3:field> + </data> + </T3FlexForms>', + 'T3FlexForms', + ], + ]; + } + + /** + * @test + * @dataProvider decodeReturnsRootNodeNameDataProvider + */ + public function decodeReturnsRootNodeName(string $input, string $rootNodeName): void + { + $xmlDecoder = new Typo3XmlParser(); + $expected = [ + 'data' => [ + 'settings.persistenceIdentifier' => [ + 'vDEF' => 'egon', + ], + ], + '_DOCUMENT_TAG' => $rootNodeName, + ]; + self::assertSame( + $expected, + $xmlDecoder->decode($input, new Typo3XmlSerializerOptions([Typo3XmlSerializerOptions::RETURN_ROOT_NODE_NAME => true])) + ); + } + + /** + * @test + * @dataProvider decodeReturnsRootNodeNameDataProvider + */ + public function decodeCanIncludeRootNode(string $input, string $rootNodeName, array $options = []): void + { + $xmlDecoder = new Typo3XmlParser(); + $expected = [ + $rootNodeName => [ + 'data' => [ + 'settings.persistenceIdentifier' => [ + 'vDEF' => 'egon', + ], + ], + ], + ]; + self::assertSame( + $expected, + $xmlDecoder->decode($input, new Typo3XmlSerializerOptions($options + [Typo3XmlSerializerOptions::INCLUDE_ROOT_NODE => true])) + ); + } + + /** + * @return array[] + */ + public static function decodeHandlesBigXmlContentDataProvider(): array + { + return [ + '1mb' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3FlexForms> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">' . str_repeat('1', 1024 * 1024) . '</value> + </field> + </data> + </T3FlexForms>', + str_repeat('1', 1024 * 1024), + ], + '5mb' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3FlexForms> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">' . str_repeat('1', 5 * 1024 * 1024) . '</value> + </field> + </data> + </T3FlexForms>', + str_repeat('1', 5 * 1024 * 1024), + ], + '10mb' => [ + '<?xml version="1.0" encoding="utf-8" standalone="yes"?> + <T3FlexForms> + <data> + <field index="settings.persistenceIdentifier"> + <value index="vDEF">' . str_repeat('1', 10 * 1024 * 1024) . '</value> + </field> + </data> + </T3FlexForms>', + str_repeat('1', 10 * 1024 * 1024), + ], + ]; + } + + /** + * @test + * @dataProvider decodeHandlesBigXmlContentDataProvider + */ + public function decodeHandlesBigXmlContent(string $input, string $testValue): void + { + $xmlDecoder = new Typo3XmlParser(); + $expected = [ + 'data' => [ + 'settings.persistenceIdentifier' => [ + 'vDEF' => $testValue, + ], + ], + ]; + self::assertSame($expected, $xmlDecoder->decode($input)); + } + + /** + * @return array[] + */ + public static function decodeHandlesAttributeTypesDataProvider(): array + { + $prefix = '<?xml version="1.0" encoding="utf-8" standalone="yes"?><T3FlexForms><field index="index">'; + $suffix = '</field></T3FlexForms>'; + return [ + 'no-type string' => [ + $prefix . '<value index="vDEF">foo bar</value>' . $suffix, + 'foo bar', + ], + 'no-type string with blank line' => [ + $prefix . '<value index="vDEF">foo bar' . PHP_EOL . '</value>' . $suffix, + 'foo bar' . PHP_EOL, + ], + 'no-type integer' => [ + $prefix . '<value index="vDEF">123</value>' . $suffix, + '123', + ], + 'no-type double' => [ + $prefix . '<value index="vDEF">1.23</value>' . $suffix, + '1.23', + ], + 'integer integer' => [ + $prefix . '<value index="vDEF" type="integer">123</value>' . $suffix, + 123, + ], + 'integer double' => [ + $prefix . '<value index="vDEF" type="integer">1.23</value>' . $suffix, + 1, + ], + 'double integer' => [ + $prefix . '<value index="vDEF" type="double">123</value>' . $suffix, + 123.0, + ], + 'double double' => [ + $prefix . '<value index="vDEF" type="double">1.23</value>' . $suffix, + 1.23, + ], + 'boolean 0' => [ + $prefix . '<value index="vDEF" type="boolean">0</value>' . $suffix, + false, + ], + 'boolean 1' => [ + $prefix . '<value index="vDEF" type="boolean">1</value>' . $suffix, + true, + ], + 'boolean true' => [ + $prefix . '<value index="vDEF" type="boolean">true</value>' . $suffix, + true, + ], + 'boolean false' => [ + $prefix . '<value index="vDEF" type="boolean">false</value>' . $suffix, + true, // sic(!) + ], + 'NULL' => [ + $prefix . '<value index="vDEF" type="NULL"></value>' . $suffix, + null, + ], + 'NULL string' => [ + $prefix . '<value index="vDEF" type="NULL">foo bar</value>' . $suffix, + null, + ], + 'NULL integer' => [ + $prefix . '<value index="vDEF" type="NULL">123</value>' . $suffix, + null, + ], + 'NULL double' => [ + $prefix . '<value index="vDEF" type="NULL">1.23</value>' . $suffix, + null, + ], + 'array' => [ + $prefix . '<value index="vDEF" type="array"></value>' . $suffix, + [], + ], + 'array with blank line' => [ + $prefix . '<value index="vDEF" type="array">' . PHP_EOL . '</value>' . $suffix, + [], + ], + ]; + } + + /** + * @test + * @dataProvider decodeHandlesAttributeTypesDataProvider + */ + public function decodeHandlesAttributeTypes(string $input, $expected): void + { + $xmlDecoder = new Typo3XmlParser(); + $result = $xmlDecoder->decode($input); + self::assertSame($expected, $result['index']['vDEF']); + } + + /** + * @test + */ + public function decodeHandlesBase64Attribute(): void + { + $xmlDecoder = new Typo3XmlParser(); + $content = file_get_contents(__DIR__ . '/Fixtures/file.gif'); + $contentBase64Encoded = chunk_split(base64_encode($content)); + $input = '<?xml version="1.0" encoding="utf-8" standalone="yes"?> +<T3FlexForms> + <field index="index"> + <value index="image" base64="1"> +' . $contentBase64Encoded . '</value> + </field> +</T3FlexForms> + '; + $result = $xmlDecoder->decode($input); + self::assertSame($content, $result['index']['image']); + } + + public static function decodeThrowsExceptionOnXmlParsingErrorDataProvider(): array + { + return [ + 'emptyXml' => [ + '', + [], + 1630773210, + ], + 'invalidXml' => [ + '<node>content', + [], + 1630773230, + ], + 'invalidNodeDocumentType' => [ + '<!DOCTYPE dummy SYSTEM "dummy.dtd"><dummy/>', + [], + 1630773261, + ], + 'noValidRootNode' => [ + '<phparray></phparray>', + [Typo3XmlSerializerOptions::IGNORED_NODE_TYPES => [\XML_ELEMENT_NODE]], + 1630773276, + ], + ]; + } + + /** + * @test + * @dataProvider decodeThrowsExceptionOnXmlParsingErrorDataProvider + */ + public function decodeThrowsExceptionOnXmlParsingError( + string $data, + array $config, + int $expected + ): void { + $this->expectException(Exception::class); + $this->expectExceptionCode($expected); + $xmlDecoder = new Typo3XmlParser(); + $xmlDecoder->decode($data, new Typo3XmlSerializerOptions($config)); + } + + /** + * @test + */ + public function encodeDecodePingPongSucceeds(): void + { + $input = [ + 'types' => [ + 'string' => 'text', + 'string-with-special-character' => 'text & image', + 'int' => 3, + 'bool' => false, + 'double' => 4.2, + 'null' => null, + ], + 'binary' => file_get_contents(__DIR__ . '/Fixtures/file.gif'), + 'empty' => [], + 'associative' => [ + 'node1' => 'value1', + 'node2' => 'value2', + ], + 'numeric' => [ + 'value1', + 'value2', + ], + 'numeric-n-index' => [ + 'value1', + 'value2', + ], + 'nested' => [ + 'node1' => 'value1', + 'node2' => [ + 'node' => 'value', + ], + ], + ]; + $additionalOptions = [ + 'useIndexTagForNum' => 'numbered-index', + 'alt_options' => [ + '/types' => [ + 'useCDATA' => true, + ], + '/numeric-n-index' => [ + 'useNindex' => true, + ], + '/nested' => [ + 'useIndexTagForAssoc' => 'nested-outer', + 'clearStackPath' => true, + 'alt_options' => [ + '/nested-outer' => [ + 'useIndexTagForAssoc' => 'nested-inner', + ], + ], + ], + ], + ]; + $encodingOptions = [ + Typo3XmlParserOptions::NAMESPACE_PREFIX => 'T3:', + ]; + $decodingOptions = [ + Typo3XmlSerializerOptions::NAMESPACE_PREFIX => 'T3:', + Typo3XmlSerializerOptions::ALLOW_UNDEFINED_NAMESPACES => true, + ]; + $xmlEncoder = new Typo3XmlSerializer(); + $xmlDecoder = new Typo3XmlParser(); + $arrayEncoded = $xmlEncoder->encode($input, new Typo3XmlParserOptions($encodingOptions), $additionalOptions); + $arrayEncodedDecoded = $xmlDecoder->decode($arrayEncoded, new Typo3XmlSerializerOptions($decodingOptions)); + self::assertEquals($input, $arrayEncodedDecoded); + } + + /** + * @test + */ + public function encodeDecodePingPongFailsForEmptyArray(): void + { + self::markTestSkipped( + 'Currently an empty array results in a string "\n" if encoded and decoded. ' . + 'This is probably not intended and not required for backward compatibility.' + ); + $result = []; + $xmlEncoder = new Typo3XmlSerializer(); + $xmlDecoder = new Typo3XmlParser(); + $arrayEncoded = $xmlEncoder->encode($result); + $arrayEncodedDecoded = $xmlDecoder->decode($arrayEncoded); + self::assertEquals($result, $arrayEncodedDecoded); + } +} diff --git a/typo3/sysext/core/Tests/Unit/Serializer/Typo3XmlSerializerTest.php b/typo3/sysext/core/Tests/Unit/Serializer/Typo3XmlSerializerTest.php new file mode 100644 index 0000000000000000000000000000000000000000..ade15427981c96a0c1ccb24069eded5393064e02 --- /dev/null +++ b/typo3/sysext/core/Tests/Unit/Serializer/Typo3XmlSerializerTest.php @@ -0,0 +1,522 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Tests\Unit\Serializer; + +use TYPO3\CMS\Core\Serializer\Typo3XmlParserOptions; +use TYPO3\CMS\Core\Serializer\Typo3XmlSerializer; +use TYPO3\TestingFramework\Core\Unit\UnitTestCase; + +class Typo3XmlSerializerTest extends UnitTestCase +{ + /** + * @test + */ + public function encodeReturnsRootNodeIfArrayIsEmpty(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode([]); + self::assertEquals('<phparray> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanPreventWrappingByRootNode(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + ['node' => 'value'], + new Typo3XmlParserOptions([Typo3XmlParserOptions::ROOT_NODE_NAME => '']) + ); + self::assertEquals('<node>value</node> +', $xml); + } + + /** + * @test + */ + public function encodeSupportsInlineXml(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + ['node' => 'value'], + new Typo3XmlParserOptions([Typo3XmlParserOptions::FORMAT => Typo3XmlParserOptions::FORMAT_INLINE]) + ); + self::assertEquals('<phparray><node>value</node></phparray>', $xml); + } + + /** + * @test + */ + public function encodeSupportsPrettyPrintWithTabIndentation(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + ['node' => 'value'], + new Typo3XmlParserOptions([Typo3XmlParserOptions::FORMAT => Typo3XmlParserOptions::FORMAT_PRETTY_WITH_TAB]) + ); + self::assertEquals('<phparray> + <node>value</node> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeSupportsPrettyPrintWith4SpacesIndentation(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + ['node' => 'value'], + new Typo3XmlParserOptions([Typo3XmlParserOptions::FORMAT => 4]) + ); + self::assertEquals('<phparray> + <node>value</node> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeEnsuresAlphaNumericCharactersAndMinusAndUnderscoreInNodeName(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode(['node-åæøöäüßąćęłńóśźżà âçèéêëîïôœùûÿ!§$%&/()=?_' => 'value']); + self::assertEquals('<phparray> + <node-_>value</node-_> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanPrependNamespaceToNodeName(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + ['node' => 'value'], + new Typo3XmlParserOptions([Typo3XmlParserOptions::NAMESPACE_PREFIX => 'namespace:']) + ); + self::assertEquals('<phparray> + <namespace:node>value</namespace:node> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanPrependNToNodeNameIfNodeNameIsNumber(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + ['value'], + null, + ['useNindex' => true] + ); + self::assertEquals('<phparray> + <n0>value</n0> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanReplaceNodeNameAndAddAsIndexIfNodeNameIsNumber(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'value-1', + 'value-2', + ], + null, + [ + 'useIndexTagForNum' => 'node-of-normal-array', + ] + ); + self::assertEquals('<phparray> + <node-of-normal-array index="0">value-1</node-of-normal-array> + <node-of-normal-array index="1">value-2</node-of-normal-array> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanReplaceNodeNameAndAddAsIndexIfNodeNameIsString(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'node-1' => 'value-1', + 'node-2' => 'value-2', + ], + null, + [ + 'useIndexTagForAssoc' => 'node-of-associative-array', + ] + ); + self::assertEquals('<phparray> + <node-of-associative-array index="node-1">value-1</node-of-associative-array> + <node-of-associative-array index="node-2">value-2</node-of-associative-array> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanReplaceNodeNameAndAddAsIndexIfParentMatchesName(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'grandParent' => [ + 'parent' => [ + 'child' => [ + 'grandChild' => 'value', + ], + ], + ], + ], + null, + [ + 'parentTagMap' => [ + 'parent' => 'child-renamed', + ], + ] + ); + self::assertEquals('<phparray> + <grandParent type="array"> + <parent type="array"> + <child-renamed index="child" type="array"> + <grandChild>value</grandChild> + </child-renamed> + </parent> + </grandParent> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanReplaceNodeNameAndAddAsIndexIfParentAndNodeMatchNames(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'grandParent' => [ + 'parent' => [ + 'child-1' => [ + 'grandChild' => 'value', + ], + 'child-2' => 'value-2', + ], + ], + ], + null, + [ + 'parentTagMap' => [ + 'parent:child-1' => 'child-1-renamed', + ], + ] + ); + self::assertEquals('<phparray> + <grandParent type="array"> + <parent type="array"> + <child-1-renamed index="child-1" type="array"> + <grandChild>value</grandChild> + </child-1-renamed> + <child-2>value-2</child-2> + </parent> + </grandParent> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanReplaceNodeNameAndAddAsIndexIfParentMatchesNameAndNodeNameIsNumber(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'grandParent' => [ + 'parent' => [ + [ + 'grandChild' => 'value', + ], + ], + ], + ], + null, + [ + 'parentTagMap' => [ + 'parent:_IS_NUM' => 'child-renamed', + ], + ] + ); + self::assertEquals('<phparray> + <grandParent type="array"> + <parent type="array"> + <child-renamed index="0" type="array"> + <grandChild>value</grandChild> + </child-renamed> + </parent> + </grandParent> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanReplaceNodeNameAndAddAsIndexIfGrandParentAndParentAndNodeMatchNames(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'grandGrandParent' => [ + 'grandParent' => [ + 'parent' => [ + 'child' => [ + 'grandChild' => 'value', + ], + ], + ], + ], + ], + null, + [ + 'grandParentTagMap' => [ + 'grandParent/parent' => 'child-renamed', + ], + ] + ); + self::assertEquals('<phparray> + <grandGrandParent type="array"> + <grandParent type="array"> + <parent type="array"> + <child-renamed index="child" type="array"> + <grandChild>value</grandChild> + </child-renamed> + </parent> + </grandParent> + </grandGrandParent> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanWrapStringWithCDATAIfStringContainsSpecialCharacters(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'node-1' => 'value without special character', + 'node-2' => 'value with special character &', + ], + null, + [ + 'useCDATA' => true, + ] + ); + self::assertEquals('<phparray> + <node-1>value without special character</node-1> + <node-2><![CDATA[value with special character &]]></node-2> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeAddsTypeAttributeToNodeIfValueIsNotString(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode([ + 'string' => 'value', + 'integer' => 1, + 'double' => 0.2, + 'boolean' => true, + 'object' => null, + 'array' => [], + ]); + self::assertEquals('<phparray> + <string>value</string> + <integer type="integer">1</integer> + <double type="double">0.2</double> + <boolean type="boolean">1</boolean> + <object type="NULL"></object> + <array type="array"></array> +</phparray>', $xml); + } + + public static function encodeCanDisableAddingTypeAttributeToNodeExceptIfValueIsArrayDataProvider(): array + { + return [ + ['disableTypeAttrib' => true], + ['disableTypeAttrib' => 1], + ]; + } + + /** + * @test + * @dataProvider encodeCanDisableAddingTypeAttributeToNodeExceptIfValueIsArrayDataProvider + */ + public function encodeCanDisableAddingTypeAttributeToNodeExceptIfValueIsArray($disableTypeAttrib): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'string' => 'value', + 'integer' => 1, + 'double' => 0.2, + 'boolean' => true, + 'object' => null, + 'array' => [], + ], + null, + [ + 'disableTypeAttrib' => $disableTypeAttrib, + ] + ); + self::assertEquals('<phparray> + <string>value</string> + <integer>1</integer> + <double>0.2</double> + <boolean>1</boolean> + <object></object> + <array type="array"></array> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeCanDisableAddingTypeAttributeToNode(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'string' => 'value', + 'integer' => 1, + 'double' => 0.2, + 'boolean' => true, + 'object' => null, + 'array' => [], + ], + null, + [ + 'disableTypeAttrib' => 2, + ] + ); + self::assertEquals('<phparray> + <string>value</string> + <integer>1</integer> + <double>0.2</double> + <boolean>1</boolean> + <object></object> + <array></array> +</phparray>', $xml); + } + + /** + * @test + */ + public function encodeAddsBase64AttributeAndEncodesWithBase64IfValueIsBinaryData(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $content = file_get_contents(__DIR__ . '/Fixtures/file.gif'); + $contentBase64Encoded = chunk_split(base64_encode($content)); + $xml = $xmlEncoder->encode([ + 'binary' => $content, + ]); + self::assertEquals("<phparray> + <binary base64=\"1\"> +$contentBase64Encoded</binary> +</phparray>", $xml); + } + + /** + * @test + */ + public function encodeCanSetAlternativeOptionsPerNestingLevel(): void + { + $xmlEncoder = new Typo3XmlSerializer(); + $xml = $xmlEncoder->encode( + [ + 'grandParent1' => [ + 'parent1' => [ + 'value1', + ], + 'parent2' => [ + 'value2', + ], + ], + 'grandParent2' => [ + 'parent3' => [ + 'child3' => 'value3', + ], + 'parent4' => [ + 'child4' => 'value4', + ], + ], + ], + null, + [ + 'useNindex' => false, + 'useIndexTagForNum' => null, + 'useIndexTagForAssoc' => null, + 'alt_options' => [ + '/grandParent1/parent1' => [ + 'useIndexTagForNum' => 'numbered-index', + ], + '/grandParent1/parent2' => [ + 'useNindex' => true, + ], + '/grandParent2' => [ + 'clearStackPath' => true, + 'alt_options' => [ + '/parent4' => [ + 'useIndexTagForAssoc' => 'named-index', + ], + ], + ], + ], + ] + ); + self::assertEquals('<phparray> + <grandParent1 type="array"> + <parent1 type="array"> + <numbered-index index="0">value1</numbered-index> + </parent1> + <parent2 type="array"> + <n0>value2</n0> + </parent2> + </grandParent1> + <grandParent2 type="array"> + <parent3 type="array"> + <child3>value3</child3> + </parent3> + <parent4 type="array"> + <named-index index="child4">value4</named-index> + </parent4> + </grandParent2> +</phparray>', $xml); + } +} diff --git a/typo3/sysext/impexp/Classes/Export.php b/typo3/sysext/impexp/Classes/Export.php index 0efefa6df42e2e3a10554a68497684068ce20770..272190ba66e75370f733f8af7a0c7cb57265a659 100644 --- a/typo3/sysext/impexp/Classes/Export.php +++ b/typo3/sysext/impexp/Classes/Export.php @@ -33,6 +33,8 @@ use TYPO3\CMS\Core\Resource\Exception\InsufficientFolderWritePermissionsExceptio use TYPO3\CMS\Core\Resource\File; use TYPO3\CMS\Core\Resource\Folder; use TYPO3\CMS\Core\Resource\ResourceFactory; +use TYPO3\CMS\Core\Serializer\Typo3XmlParserOptions; +use TYPO3\CMS\Core\Serializer\Typo3XmlSerializer; use TYPO3\CMS\Core\Utility\GeneralUtility; use TYPO3\CMS\Core\Utility\PathUtility; use TYPO3\CMS\Impexp\View\ExportPageTreeView; @@ -1276,7 +1278,11 @@ class Export extends ImportExport // Creating XML file from $outputArray: $charset = $this->dat['header']['charset'] ?: 'utf-8'; $XML = '<?xml version="1.0" encoding="' . $charset . '" standalone="yes" ?>' . LF; - $XML .= GeneralUtility::array2xml($this->dat, '', 0, 'T3RecordDocument', 0, $options); + $XML .= (new Typo3XmlSerializer())->encodeWithReturningExceptionAsString( + $this->dat, + new Typo3XmlParserOptions([Typo3XmlParserOptions::ROOT_NODE_NAME => 'T3RecordDocument']), + $options + ); return $XML; } diff --git a/typo3/sysext/impexp/Classes/Import.php b/typo3/sysext/impexp/Classes/Import.php index dc4909623da5da7acae3e3fdf187c5ad7fd5bece..f484a9e2a8f87a987ccfe655623bf6cf91400826 100644 --- a/typo3/sysext/impexp/Classes/Import.php +++ b/typo3/sysext/impexp/Classes/Import.php @@ -29,6 +29,8 @@ use TYPO3\CMS\Core\Resource\File; use TYPO3\CMS\Core\Resource\ResourceFactory; use TYPO3\CMS\Core\Resource\ResourceStorage; use TYPO3\CMS\Core\Resource\Security\FileNameValidator; +use TYPO3\CMS\Core\Serializer\Typo3XmlParser; +use TYPO3\CMS\Core\Serializer\Typo3XmlSerializerOptions; use TYPO3\CMS\Core\Service\FlexFormService; use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; use TYPO3\CMS\Core\Utility\GeneralUtility; @@ -173,15 +175,24 @@ class Import extends ImportExport if ($fileExtension === 'xml') { $xmlContent = (string)file_get_contents($filePath); if (strlen($xmlContent)) { - $this->dat = GeneralUtility::xml2array($xmlContent, '', true); - if (is_array($this->dat)) { + try { + $dat = (new Typo3XmlParser())->decode( + $xmlContent, + new Typo3XmlSerializerOptions([ + Typo3XmlSerializerOptions::RETURN_ROOT_NODE_NAME => true, + Typo3XmlSerializerOptions::LOAD_OPTIONS => \LIBXML_NONET | \LIBXML_NOBLANKS | \LIBXML_PARSEHUGE, + // @todo check if needed for imports/throw deprecation for invalid xml + Typo3XmlSerializerOptions::ALLOW_UNDEFINED_NAMESPACES, + ]) + ); + $this->dat = is_array($dat) ? $dat : [$dat]; if ($this->dat['_DOCUMENT_TAG'] === 'T3RecordDocument' && is_array($this->dat['header'] ?? null) && is_array($this->dat['records'] ?? null)) { $this->loadInit(); } else { $this->addError('XML file did not contain proper XML for TYPO3 Import'); } - } else { - $this->addError('XML could not be parsed: ' . $this->dat); + } catch (\Throwable $e) { + $this->addError('XML could not be parsed: ' . $e->getMessage()); } } else { $this->addError('Error opening file: ' . $filePath); @@ -1353,16 +1364,23 @@ class Import extends ImportExport $actualRecord ); $dataStructure = $flexFormTools->parseDataStructureByIdentifier($dataStructureIdentifier); - $flexFormData = GeneralUtility::xml2array($this->dat['records'][$table . ':' . $uid]['data'][$field]); - $flexFormIterator = GeneralUtility::makeInstance(DataHandler::class); - $flexFormIterator->callBackObj = $this; - $flexFormData['data'] = $flexFormIterator->checkValue_flex_procInData( - $flexFormData['data'], - [], - $dataStructure, - [$relation], - 'remapRelationsOfFlexFormCallBack' + $flexFormData = (new Typo3XmlParser())->decodeWithReturningExceptionAsString( + (string)($this->dat['records'][$table . ':' . $uid]['data'][$field] ?? ''), + new Typo3XmlSerializerOptions([ + Typo3XmlSerializerOptions::ALLOW_UNDEFINED_NAMESPACES, + ]) ); + if (is_array($flexFormData['data'] ?? null)) { + $flexFormIterator = GeneralUtility::makeInstance(DataHandler::class); + $flexFormIterator->callBackObj = $this; + $flexFormData['data'] = $flexFormIterator->checkValue_flex_procInData( + $flexFormData['data'], + [], + $dataStructure, + [$relation], + 'remapRelationsOfFlexFormCallBack' + ); + } if (is_array($flexFormData['data'] ?? null)) { $updateData[$table][$actualUid][$field] = $flexFormData; } @@ -1465,16 +1483,23 @@ class Import extends ImportExport $actualRecord ); $dataStructure = $flexFormTools->parseDataStructureByIdentifier($dataStructureIdentifier); - $flexFormData = GeneralUtility::xml2array($actualRecord[$field]); - $flexFormIterator = GeneralUtility::makeInstance(DataHandler::class); - $flexFormIterator->callBackObj = $this; - $flexFormData['data'] = $flexFormIterator->checkValue_flex_procInData( - $flexFormData['data'], - [], - $dataStructure, - [$table, $uid, $field, $softrefsByField], - 'processSoftReferencesFlexFormCallBack' + $flexFormData = (new Typo3XmlParser())->decodeWithReturningExceptionAsString( + (string)($actualRecord[$field] ?? ''), + new Typo3XmlSerializerOptions([ + Typo3XmlSerializerOptions::ALLOW_UNDEFINED_NAMESPACES, + ]) ); + if (is_array($flexFormData['data'] ?? null)) { + $flexFormIterator = GeneralUtility::makeInstance(DataHandler::class); + $flexFormIterator->callBackObj = $this; + $flexFormData['data'] = $flexFormIterator->checkValue_flex_procInData( + $flexFormData['data'], + [], + $dataStructure, + [$table, $uid, $field, $softrefsByField], + 'processSoftReferencesFlexFormCallBack' + ); + } if (is_array($flexFormData['data'] ?? null)) { $updateData[$table][$actualUid][$field] = $flexFormData; }