diff --git a/typo3/sysext/core/Classes/Html/HtmlParser.php b/typo3/sysext/core/Classes/Html/HtmlParser.php index 3fdf78324a0fb5315970a0439ceba3a22ced91de..a5d049eaa5a00e6af72b1bf187b0707f9dddcc94 100644 --- a/typo3/sysext/core/Classes/Html/HtmlParser.php +++ b/typo3/sysext/core/Classes/Html/HtmlParser.php @@ -183,21 +183,20 @@ class HtmlParser /** * Returns an array with either tag or non-tag content of the result from ->splitIntoBlock()/->splitTags() + * Does not include the tags in the tag parts. * * @param array $parts Parts generated by ->splitIntoBlock() or >splitTags() * @param bool $tag_parts Whether to return the tag-parts (default,TRUE) or what was outside the tags. - * @param bool $include_tag Whether to include the tags in the tag-parts (most useful for input made by ->splitIntoBlock()) * @return array Tag-parts/Non-tag-parts depending on input argument settings * @see splitIntoBlock(), splitTags() + * @private Currently only use to remove content from table cells inside RteHtmlParser */ - public function getAllParts($parts, $tag_parts = true, $include_tag = true) + public function getAllParts($parts, $tag_parts = true) { $newParts = array(); foreach ($parts as $k => $v) { if (($k + ($tag_parts ? 0 : 1)) % 2) { - if (!$include_tag) { - $v = $this->removeFirstAndLastTag($v); - } + $v = $this->removeFirstAndLastTag($v); $newParts[] = $v; } } @@ -332,66 +331,6 @@ class HtmlParser return array($value, $metaValue); } - /** - * Checks whether block/solo tags are found in the correct amounts in HTML content - * Block tags are tags which are required to have an equal amount of start and end tags, eg. "<table>...</table>" - * Solo tags are tags which are required to have ONLY start tags (possibly with an XHTML ending like ".../>") - * NOTICE: Correct XHTML might actually fail since "<br></br>" is allowed as well as "<br/>". However only the LATTER is accepted by this function (with "br" in the "solo-tag" list), the first example will result in a warning. - * NOTICE: Correct XHTML might actually fail since "<p/>" is allowed as well as "<p></p>". However only the LATTER is accepted by this function (with "p" in the "block-tag" list), the first example will result in an ERROR! - * NOTICE: Correct HTML version "something" allows eg. <p> and <li> to be NON-ended (implicitly ended by other tags). However this is NOT accepted by this function (with "p" and "li" in the block-tag list) and it will result in an ERROR! - * - * @param string $content HTML content to analyze - * @param string $blockTags Tag names for block tags (eg. table or div or p) in lowercase, commalist (eg. "table,div,p") - * @param string $soloTags Tag names for solo tags (eg. img, br or input) in lowercase, commalist ("img,br,input") - * @return array Analyse data. - */ - public function checkTagTypeCounts($content, $blockTags = 'a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags = 'br,hr,img,input,area') - { - $content = strtolower($content); - $analyzedOutput = array(); - // Counts appearances of start-tags - $analyzedOutput['counts'] = array(); - // Lists ERRORS - $analyzedOutput['errors'] = array(); - // Lists warnings. - $analyzedOutput['warnings'] = array(); - // Lists stats for block-tags - $analyzedOutput['blocks'] = array(); - // Lists stats for solo-tags - $analyzedOutput['solo'] = array(); - // Block tags, must have endings... - $blockTags = explode(',', $blockTags); - foreach ($blockTags as $tagName) { - $countBegin = count(preg_split(('/\\<' . preg_quote($tagName, '/') . '(\\s|\\>)/s'), $content)) - 1; - $countEnd = count(preg_split(('/\\<\\/' . preg_quote($tagName, '/') . '(\\s|\\>)/s'), $content)) - 1; - $analyzedOutput['blocks'][$tagName] = array($countBegin, $countEnd, $countBegin - $countEnd); - if ($countBegin) { - $analyzedOutput['counts'][$tagName] = $countBegin; - } - if ($countBegin - $countEnd) { - if ($countBegin - $countEnd > 0) { - $analyzedOutput['errors'][$tagName] = 'There were more start-tags (' . $countBegin . ') than end-tags (' . $countEnd . ') for the element "' . $tagName . '". There should be an equal amount!'; - } else { - $analyzedOutput['warnings'][$tagName] = 'There were more end-tags (' . $countEnd . ') than start-tags (' . $countBegin . ') for the element "' . $tagName . '". There should be an equal amount! However the problem is not fatal.'; - } - } - } - // Solo tags, must NOT have endings... - $soloTags = explode(',', $soloTags); - foreach ($soloTags as $tagName) { - $countBegin = count(preg_split(('/\\<' . preg_quote($tagName, '/') . '(\\s|\\>)/s'), $content)) - 1; - $countEnd = count(preg_split(('/\\<\\/' . preg_quote($tagName, '/') . '(\\s|\\>)/s'), $content)) - 1; - $analyzedOutput['solo'][$tagName] = array($countBegin, $countEnd); - if ($countBegin) { - $analyzedOutput['counts'][$tagName] = $countBegin; - } - if ($countEnd) { - $analyzedOutput['warnings'][$tagName] = 'There were end-tags found (' . $countEnd . ') for the element "' . $tagName . '". This was not expected (although XHTML technically allows it).'; - } - } - return $analyzedOutput; - } - /********************************* * * Clean HTML code @@ -869,45 +808,6 @@ class HtmlParser return $srcVal; } - /** - * Cleans up the input $value for fonttags. - * If keepFace,-Size and -Color is set then font-tags with an allowed property is kept. Else deleted. - * - * @param string HTML content with font-tags inside to clean up. - * @param bool If set, keep "face" attribute - * @param bool If set, keep "size" attribute - * @param bool If set, keep "color" attribute - * @return string Processed HTML content - */ - public function cleanFontTags($value, $keepFace = 0, $keepSize = 0, $keepColor = 0) - { - // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set! - $fontSplit = $this->splitIntoBlock('font', $value); - foreach ($fontSplit as $k => $v) { - // Font - if ($k % 2) { - $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v)); - $newAttribs = array(); - if ($keepFace && $attribArray['face']) { - $newAttribs[] = 'face="' . $attribArray['face'] . '"'; - } - if ($keepSize && $attribArray['size']) { - $newAttribs[] = 'size="' . $attribArray['size'] . '"'; - } - if ($keepColor && $attribArray['color']) { - $newAttribs[] = 'color="' . $attribArray['color'] . '"'; - } - $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v), $keepFace, $keepSize, $keepColor); - if (!empty($newAttribs)) { - $fontSplit[$k] = '<font ' . implode(' ', $newAttribs) . '>' . $innerContent . '</font>'; - } else { - $fontSplit[$k] = $innerContent; - } - } - } - return implode('', $fontSplit); - } - /** * This is used to map certain tag-names into other names. * @@ -925,44 +825,6 @@ class HtmlParser return $value; } - /** - * This converts htmlspecialchar()'ed tags (from $tagList) back to real tags. Eg. '<strong>' would be converted back to '<strong>' if found in $tagList - * - * @param string $content HTML content - * @param string $tagList Tag list, separated by comma. Lowercase! - * @return string Processed HTML content - */ - public function unprotectTags($content, $tagList = '') - { - $tagsArray = GeneralUtility::trimExplode(',', $tagList, true); - $contentParts = explode('<', $content); - // bypass the first - $contentPartsSliced = array_slice($contentParts, 1, null, true); - foreach ($contentPartsSliced as $k => $tok) { - $firstChar = $tok[0]; - if (trim($firstChar) !== '') { - $subparts = explode('>', $tok, 2); - $tagEnd = strlen($subparts[0]); - if (strlen($tok) != $tagEnd) { - $endTag = $firstChar == '/' ? 1 : 0; - $tagContent = substr($tok, $endTag, $tagEnd - $endTag); - $tagParts = preg_split('/\\s+/s', $tagContent, 2); - $tagName = strtolower($tagParts[0]); - if ((string)$tagList === '' || in_array($tagName, $tagsArray)) { - $contentParts[$k] = '<' . $subparts[0] . '>' . $subparts[1]; - } else { - $contentParts[$k] = '<' . $tok; - } - } else { - $contentParts[$k] = '<' . $tok; - } - } else { - $contentParts[$k] = '<' . $tok; - } - } - return implode('', $contentParts); - } - /** * Internal function for case shifting of a string or whole array * @@ -1017,39 +879,6 @@ class HtmlParser return implode(' ', $accu); } - /** - * Get tag attributes, the classic version (which had some limitations?) - * - * @param string $tag The tag - * @param bool $deHSC De-htmlspecialchar flag. - * @return array - * @access private - */ - public function get_tag_attributes_classic($tag, $deHSC = 0) - { - $attr = $this->get_tag_attributes($tag, $deHSC); - return is_array($attr[0]) ? $attr[0] : array(); - } - - /** - * Indents input content with $number instances of $indentChar - * - * @param string $content Content string, multiple lines. - * @param int $number Number of indents - * @param string $indentChar Indent character/string - * @return string Indented code (typ. HTML) - */ - public function indentLines($content, $number = 1, $indentChar = TAB) - { - $preTab = str_pad('', $number * strlen($indentChar), $indentChar); - $lines = explode(LF, str_replace(CR, '', $content)); - foreach ($lines as &$line) { - $line = $preTab . $line; - } - unset($line); - return implode(LF, $lines); - } - /** * Converts TSconfig into an array for the HTMLcleaner function. * diff --git a/typo3/sysext/core/Classes/Html/RteHtmlParser.php b/typo3/sysext/core/Classes/Html/RteHtmlParser.php index 2680257e6ceab39668347c42646d3fe0244b505f..f63c5fbf7ef31e8c616c5822d654ecb4490353dc 100644 --- a/typo3/sysext/core/Classes/Html/RteHtmlParser.php +++ b/typo3/sysext/core/Classes/Html/RteHtmlParser.php @@ -330,7 +330,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser // Image found, do processing: if ($k % 2) { // Get attributes - $attribArray = $this->get_tag_attributes_classic($v, 1); + list($attribArray) = $this->get_tag_attributes($v, true); // It's always an absolute URL coming from the RTE into the Database. $absoluteUrl = trim($attribArray['src']); // Make path absolute if it is relative and we have a site path which is not '/' @@ -479,7 +479,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser // Image found if ($k % 2) { // Get the attributes of the img tag - $attribArray = $this->get_tag_attributes_classic($v, 1); + list($attribArray) = $this->get_tag_attributes($v, true); $absoluteUrl = trim($attribArray['src']); // Transform the src attribute into an absolute url, if it not already if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') { @@ -521,7 +521,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser foreach ($blockSplit as $k => $v) { // Block if ($k % 2) { - $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1); + list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true); // If the url is local, remove url-prefix if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) { $attribArray['href'] = $this->relBackPath . substr($attribArray['href'], strlen($siteURL)); @@ -553,7 +553,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser foreach ($blockSplit as $k => $v) { // If an A-tag was found: if ($k % 2) { - $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1); + list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true); $info = $this->urlInfoForLinkTags($attribArray['href']); // Check options: $attribArray_copy = $attribArray; @@ -799,7 +799,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser foreach ($blockSplit as $k => $v) { // Block if ($k % 2) { - $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v)); + list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v)); if ($attribArray['specialtag']) { $theTag = rawurldecode($attribArray['specialtag']); $theTagName = $this->getFirstTagName($theTag); @@ -913,7 +913,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser case 'h6': if (!$css) { - $attribArray = $this->get_tag_attributes_classic($tag); + list($attribArray) = $this->get_tag_attributes($tag); // Processing inner content here: $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k])); $blockSplit[$k] = '<' . $tagName . ($attribArray['align'] ? ' align="' . htmlspecialchars($attribArray['align']) . '"' : '') . ($attribArray['class'] ? ' class="' . htmlspecialchars($attribArray['class']) . '"' : '') . '>' . $innerContent . '</' . $tagName . '>' . $lastBR; @@ -959,7 +959,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser foreach ($blockSplit as $k => $v) { // If an A-tag was found if ($k % 2) { - $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1); + list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true); // If "style" attribute is set and rteerror is not set! if ($attribArray['style'] && !$attribArray['rteerror']) { $attribArray_copy['style'] = $attribArray['style']; @@ -1416,7 +1416,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser $rowSplit = $this->splitIntoBlock('tr', $v); foreach ($rowSplit as $k2 => $v2) { if ($k2 % 2) { - $cellSplit = $this->getAllParts($this->splitIntoBlock('td', $v2), 1, 0); + $cellSplit = $this->getAllParts($this->splitIntoBlock('td', $v2)); foreach ($cellSplit as $k3 => $v3) { $tableSplit[$k] .= $v3 . $breakChar; } @@ -1559,7 +1559,7 @@ class RteHtmlParser extends \TYPO3\CMS\Core\Html\HtmlParser foreach ($blockSplit as $k => $v) { // Block if ($k % 2) { - $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1); + list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true); // Checking if there is a scheme, and if not, prepend the current url. // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved... if ($attribArray['href'] !== '') { diff --git a/typo3/sysext/core/Documentation/Changelog/master/Breaking-72667-RTEUnusedInternalMethodsRemoved.rst b/typo3/sysext/core/Documentation/Changelog/master/Breaking-72667-RTEUnusedInternalMethodsRemoved.rst new file mode 100644 index 0000000000000000000000000000000000000000..37b0bcc1dd22e713b883ad150adc7f2092ffc467 --- /dev/null +++ b/typo3/sysext/core/Documentation/Changelog/master/Breaking-72667-RTEUnusedInternalMethodsRemoved.rst @@ -0,0 +1,30 @@ +======================================================= +Breaking: #72667 - RTE: Unused internal methods removed +======================================================= + +Description +=========== + +The HTML parsing features for the Rich Text Editor feature related to the xhtml_cleaning were removed. The following now obsolete methods are +removed as well: + +* ``HtmlParser->checkTagTypeCounts()`` +* ``HtmlParser->unprotectTags()`` +* ``HtmlParser->get_tag_attributes_classic()`` +* ``HtmlParser->cleanFontTags()`` +* ``HtmlParser->indentLines()`` + +Additionally, the third parameter for the method ``HtmlParser->getAllParts()`` was removed as well, resulting that the method will always include +the parsed tags in the result set. + + +Impact +====== + +Calling any of the methods will result in a fatal PHP error. + + +Affected Installations +====================== + +Any installation which uses a third-party extension that modifies the HtmlParsing via PHP. \ No newline at end of file