From f58a27c8b2bc8256f3e0b95374e32a19e0167647 Mon Sep 17 00:00:00 2001 From: Jigal van Hemert <jigal.van.hemert@typo3.org> Date: Mon, 14 Oct 2013 10:34:52 +0200 Subject: [PATCH] [BUGFIX] Use callback in preg_replace in RemoveXSS Since PHP 5.5.0 the use of the /e modifier is deprecated in preg_replace. Use callback function instead. Also change comments to CGL format. Change-Id: I44f12e8bfa1c976e494dae847cc6c53d15ed7c2d Fixes: #52771 Releases: 6.0, 6.1, 6.2 Reviewed-on: https://review.typo3.org/24723 Reviewed-by: Stefan Neufeind Tested-by: Stefan Neufeind Reviewed-by: Markus Klein Tested-by: Markus Klein --- typo3/contrib/RemoveXSS/RemoveXSS.php | 64 ++++++++++++++++----------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/typo3/contrib/RemoveXSS/RemoveXSS.php b/typo3/contrib/RemoveXSS/RemoveXSS.php index 4bee4340994b..b1598d0c7558 100644 --- a/typo3/contrib/RemoveXSS/RemoveXSS.php +++ b/typo3/contrib/RemoveXSS/RemoveXSS.php @@ -32,25 +32,37 @@ class RemoveXSS { * @return string Input string with potential XSS code removed */ public static function process($val, $replaceString = '<x>') { - // don't use empty $replaceString because then no XSS-remove will be done + // Don't use empty $replaceString because then no XSS-remove will be done if ($replaceString == '') { $replaceString = '<x>'; } - // remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed - // this prevents some character re-spacing such as <java\0script> - // note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs + // Remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed. + // This prevents some character re-spacing such as <java\0script> + // Note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs $val = preg_replace('/([\x00-\x08]|[\x0b-\x0c]|[\x0e-\x19])/', '', $val); - // straight replacements, the user should never need these since they're normal characters - // this prevents like <IMG SRC=@avascript:alert('XSS')> - $searchHexEncodings = '/&#[xX]0{0,8}(21|22|23|24|25|26|27|28|29|2a|2b|2d|2f|30|31|32|33|34|35|36|37|38|39|3a|3b|3d|3f|40|41|42|43|44|45|46|47|48|49|4a|4b|4c|4d|4e|4f|50|51|52|53|54|55|56|57|58|59|5a|5b|5c|5d|5e|5f|60|61|62|63|64|65|66|67|68|69|6a|6b|6c|6d|6e|6f|70|71|72|73|74|75|76|77|78|79|7a|7b|7c|7d|7e);?/ie'; - $searchUnicodeEncodings = '/�{0,8}(33|34|35|36|37|38|39|40|41|42|43|45|47|48|49|50|51|52|53|54|55|56|57|58|59|61|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126);?/ie'; + // Straight replacements, the user should never need these since they're normal characters. + // This prevents like <IMG SRC=@avascript:alert('XSS')> + $searchHexEncodings = '/&#[xX]0{0,8}(21|22|23|24|25|26|27|28|29|2a|2b|2d|2f|30|31|32|33|34|35|36|37|38|39|3a|3b|3d|3f|40|41|42|43|44|45|46|47|48|49|4a|4b|4c|4d|4e|4f|50|51|52|53|54|55|56|57|58|59|5a|5b|5c|5d|5e|5f|60|61|62|63|64|65|66|67|68|69|6a|6b|6c|6d|6e|6f|70|71|72|73|74|75|76|77|78|79|7a|7b|7c|7d|7e);?/i'; + $searchUnicodeEncodings = '/�{0,8}(33|34|35|36|37|38|39|40|41|42|43|45|47|48|49|50|51|52|53|54|55|56|57|58|59|61|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126);?/i'; while (preg_match($searchHexEncodings, $val) || preg_match($searchUnicodeEncodings, $val)) { - $val = preg_replace($searchHexEncodings, "chr(hexdec('\\1'))", $val); - $val = preg_replace($searchUnicodeEncodings, "chr('\\1')", $val); + $val = preg_replace_callback( + $searchHexEncodings, + function ($matches) { + return chr(hexdec($matches[1])); + }, + $val + ); + $val = preg_replace_callback( + $searchUnicodeEncodings, + function ($matches) { + return chr($matches[1]); + }, + $val + ); } - // now the only remaining whitespace attacks are \t, \n, and \r + // Now the only remaining whitespace attacks are \t, \n, and \r $ra1 = array('javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'video', 'audio', 'track', 'canvas', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', @@ -86,13 +98,13 @@ class RemoveXSS { 'onsuspend', 'ontimeupdate', 'onundo', 'onunload', 'onvolumechange', 'onwaiting'); $ra_protocol = array('javascript', 'vbscript', 'expression'); - //remove the potential &#xxx; stuff for testing + // Remove the potential &#xxx; stuff for testing $val2 = preg_replace('/(&#[xX]?0{0,8}(9|10|13|a|b);?)*\s*/i', '', $val); $ra = array(); foreach ($ra1 as $ra1word) { - // stripos is faster than the regular expressions used later and because the words we're looking for only have - // chars < 0x80 we can use the non-multibyte safe version + // Stripos is faster than the regular expressions used later and because the words we're looking for only have + // chars < 0x80 we can use the non-multibyte safe version. if (stripos($val2, $ra1word ) !== FALSE ) { //keep list of potential words that were found if (in_array($ra1word, $ra_protocol, TRUE)) { @@ -104,13 +116,13 @@ class RemoveXSS { if (in_array($ra1word, $ra_attribute, TRUE)) { $ra[] = array($ra1word, 'ra_attribute'); } - //some keywords appear in more than one array - //these get multiple entries in $ra, each with the appropriate type + // Some keywords appear in more than one array. + // These get multiple entries in $ra, each with the appropriate type } } - //only process potential words + // Only process potential words if (count($ra) > 0) { - // keep replacing as long as the previous round replaced something + // Keep replacing as long as the previous round replaced something $found = TRUE; while ($found == TRUE) { $val_before = $val; @@ -122,29 +134,29 @@ class RemoveXSS { } $pattern .= $ra[$i][0][$j]; } - //handle each type a little different (extra conditions to prevent false positives a bit better) + // Handle each type a little different (extra conditions to prevent false positives a bit better) switch ($ra[$i][1]) { case 'ra_protocol': - //these take the form of e.g. 'javascript:' + // These take the form of e.g. 'javascript:' $pattern .= '((&#[xX]0{0,8}([9ab]);?)|(�{0,8}(9|10|13);?)|\s)*(?=:)'; break; case 'ra_tag': - //these take the form of e.g. '<SCRIPT[^\da-z] ....'; + // These take the form of e.g. '<SCRIPT[^\da-z] ....'; $pattern = '(?<=<)' . $pattern . '((&#[xX]0{0,8}([9ab]);?)|(�{0,8}(9|10|13);?)|\s)*(?=[^\da-z])'; break; case 'ra_attribute': - //these take the form of e.g. 'onload=' Beware that a lot of characters are allowed - //between the attribute and the equal sign! + // These take the form of e.g. 'onload=' Beware that a lot of characters are allowed + // between the attribute and the equal sign! $pattern .= '[\s\!\#\$\%\&\(\)\*\~\+\-\_\.\,\:\;\?\@\[\/\|\\\\\]\^\`]*(?==)'; break; } $pattern = '/' . $pattern . '/i'; - // add in <x> to nerf the tag + // Add in <x> to nerf the tag $replacement = substr_replace($ra[$i][0], $replaceString, 2, 0); - // filter out the hex tags + // Filter out the hex tags $val = preg_replace($pattern, $replacement, $val); if ($val_before == $val) { - // no replacements were made, so exit the loop + // No replacements were made, so exit the loop $found = FALSE; } } -- GitLab