diff --git a/t3lib/class.t3lib_cs.php b/t3lib/class.t3lib_cs.php index a49a9ea2795a5ea75a38352538d978a012323539..712c5006d602c135959c4b13fd247815aa9f2196 100644 --- a/t3lib/class.t3lib_cs.php +++ b/t3lib/class.t3lib_cs.php @@ -572,7 +572,7 @@ class t3lib_cs { if (TYPO3_OS == 'WIN') { $cs = $this->script_to_charset_windows[$script] ? $this->script_to_charset_windows[$script] : 'windows-1252'; } else { - $cs = $this->script_to_charset_unix[$script] ? $this->script_to_charset_unix[$script] : 'iso-8859-1'; + $cs = $this->script_to_charset_unix[$script] ? $this->script_to_charset_unix[$script] : 'utf-8'; } return $cs; @@ -814,26 +814,32 @@ class t3lib_cs { * @param boolean If set, then all string-HTML entities (like & or £ will be converted as well) * @return string Output string */ - function entities_to_utf8($str, $alsoStdHtmlEnt = 0) { + function entities_to_utf8($str, $alsoStdHtmlEnt = FALSE) { if ($alsoStdHtmlEnt) { - $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES)); // Getting them in iso-8859-1 - but thats ok since this is observed below. + $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8')); } $token = md5(microtime()); $parts = explode($token, preg_replace('/(&([#[:alnum:]]*);)/', $token . '${2}' . $token, $str)); foreach ($parts as $k => $v) { - if ($k % 2) { - if (substr($v, 0, 1) == '#') { // Dec or hex entities: - if (substr($v, 1, 1) == 'x') { - $parts[$k] = $this->UnumberToChar(hexdec(substr($v, 2))); - } else { - $parts[$k] = $this->UnumberToChar(substr($v, 1)); - } - } elseif ($alsoStdHtmlEnt && $trans_tbl['&' . $v . ';']) { // Other entities: - $parts[$k] = $this->utf8_encode($trans_tbl['&' . $v . ';'], 'iso-8859-1'); - } else { // No conversion: - $parts[$k] = '&' . $v . ';'; + // only take every second element + if ($k % 2 === 0) { + continue; + } + + $position = 0; + if (substr($v, $position, 1) == '#') { // Dec or hex entities: + $position++; + if (substr($v, $position, 1) == 'x') { + $v = hexdec(substr($v, ++$position)); + } else { + $v = substr($v, $position); } + $parts[$k] = $this->UnumberToChar($v); + } elseif ($alsoStdHtmlEnt && isset($trans_tbl['&' . $v . ';'])) { // Other entities: + $parts[$k] = $trans_tbl['&' . $v . ';']; + } else { // No conversion: + $parts[$k] = '&' . $v . ';'; } } @@ -2346,4 +2352,4 @@ if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLA include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']); } -?> \ No newline at end of file +?> diff --git a/t3lib/class.t3lib_div.php b/t3lib/class.t3lib_div.php index 86186d8f7c28c0dfdcda353470f81a15e177ab94..67d6be15b7e3a90ef3ed70c4e2fb8b8f70807b23 100644 --- a/t3lib/class.t3lib_div.php +++ b/t3lib/class.t3lib_div.php @@ -375,7 +375,7 @@ final class t3lib_div { } else { // this case should not happen $csConvObj = self::makeInstance('t3lib_cs'); - return $csConvObj->crop('iso-8859-1', $string, $chars, $appendString); + return $csConvObj->crop('utf-8', $string, $chars, $appendString); } } @@ -4189,7 +4189,7 @@ final class t3lib_div { if (@is_file($fileRef) && $langKey) { // Set charsets: - $sourceCharset = $csConvObj->parse_charset($csConvObj->charSetArray[$langKey] ? $csConvObj->charSetArray[$langKey] : 'iso-8859-1'); + $sourceCharset = $csConvObj->parse_charset($csConvObj->charSetArray[$langKey] ? $csConvObj->charSetArray[$langKey] : 'utf-8'); if ($charset) { $targetCharset = $csConvObj->parse_charset($charset); } else { @@ -4216,9 +4216,9 @@ final class t3lib_div { // converting the default language (English) // this needs to be done for a few accented loan words and extension names - if (is_array($LOCAL_LANG['default']) && $targetCharset != 'iso-8859-1') { + if (is_array($LOCAL_LANG['default']) && $targetCharset != 'utf-8') { foreach ($LOCAL_LANG['default'] as &$labelValue) { - $labelValue = $csConvObj->conv($labelValue, 'iso-8859-1', $targetCharset); + $labelValue = $csConvObj->conv($labelValue, 'utf-8', $targetCharset); } unset($labelValue); } @@ -5190,7 +5190,7 @@ final class t3lib_div { * @param string $charset Charset used for encoding * @return string The encoded string */ - public static function encodeHeader($line, $enc = 'quoted-printable', $charset = 'iso-8859-1') { + public static function encodeHeader($line, $enc = 'quoted-printable', $charset = 'utf-8') { // Avoid problems if "###" is found in $line (would conflict with the placeholder which is used below) if (strpos($line, '###') !== FALSE) { return $line; diff --git a/t3lib/l10n/parser/class.t3lib_l10n_parser_llphp.php b/t3lib/l10n/parser/class.t3lib_l10n_parser_llphp.php index 953f495520a0b3c44d119e623e8c7e54df4f2d02..497c7ac35aea67161e942951e5821151c824ebe5 100644 --- a/t3lib/l10n/parser/class.t3lib_l10n_parser_llphp.php +++ b/t3lib/l10n/parser/class.t3lib_l10n_parser_llphp.php @@ -140,9 +140,9 @@ class t3lib_l10n_parser_Llphp implements t3lib_l10n_parser { // Converting the default language (English) // This needs to be done for a few accented loan words and extension names - if (is_array($LOCAL_LANG['default']) && $this->targetCharset !== 'iso-8859-1') { + if (is_array($LOCAL_LANG['default']) && $this->targetCharset !== 'utf-8') { foreach ($LOCAL_LANG['default'] as &$labelValue) { - $labelValue = $this->csConvObj->conv($labelValue, 'iso-8859-1', $this->targetCharset); + $labelValue = $this->csConvObj->conv($labelValue, 'utf-8', $this->targetCharset); } unset($labelValue); } @@ -211,7 +211,7 @@ class t3lib_l10n_parser_Llphp implements t3lib_l10n_parser { */ protected function setCharsets($languageKey, $charset) { $this->sourceCharset = $this->csConvObj->parse_charset($this->csConvObj->charSetArray[$languageKey] - ? $this->csConvObj->charSetArray[$languageKey] : 'iso-8859-1'); + ? $this->csConvObj->charSetArray[$languageKey] : 'utf-8'); if ($charset) { $this->targetCharset = $this->csConvObj->parse_charset($charset); } else { diff --git a/tests/t3lib/class.t3lib_pagerendererTest.php b/tests/t3lib/class.t3lib_pagerendererTest.php index 60290cac2cc43531475301bec5e740c758e50e2b..775b4bc0aa565c66e86ec5c34312a8b4f9a9effb 100644 --- a/tests/t3lib/class.t3lib_pagerendererTest.php +++ b/tests/t3lib/class.t3lib_pagerendererTest.php @@ -766,7 +766,7 @@ class t3lib_PageRendererTest extends tx_phpunit_testcase { public function isInlineLanguageLabelDeliveredWithNonUTF8() { $testPrefix = uniqid('test'); $this->fixture->loadExtCore(); - $this->fixture->setCharSet('iso-8859-1'); + $this->fixture->setCharSet('utf-8'); $this->fixture->addInlineLanguageLabel($testPrefix, $testPrefix . "_\xd8"); $out = $this->fixture->render(); diff --git a/typo3/sysext/cms/tslib/class.tslib_fe.php b/typo3/sysext/cms/tslib/class.tslib_fe.php index d5f2fd51989981fc3a338b9371fbd52f28d5b822..446f0f98893ad34666566c65688444a8609ddbeb 100644 --- a/typo3/sysext/cms/tslib/class.tslib_fe.php +++ b/typo3/sysext/cms/tslib/class.tslib_fe.php @@ -213,7 +213,7 @@ * @var t3lib_cs */ var $csConvObj; - var $defaultCharSet = 'iso-8859-1'; // The default charset used in the frontend if nothing else is set. + var $defaultCharSet = 'utf-8'; // The default charset used in the frontend if nothing else is set. var $renderCharset=''; // Internal charset of the frontend during rendering. (Default: UTF-8) var $metaCharset=''; // Output charset of the websites content. This is the charset found in the header, meta tag etc. If different from $renderCharset a conversion happens before output to browser. Defaults to ->renderCharset if not set. var $localeCharset=''; // Assumed charset of locale strings. @@ -4788,7 +4788,7 @@ if (version == "n3") { /** * Converts the charset of the input string if applicable. - * The "to" charset is determined by the currently used charset for the page which is "iso-8859-1" by default or set by $GLOBALS['TSFE']->config['config']['renderCharset'] + * The "to" charset is determined by the currently used charset for the page which is "utf-8" by default or set by $GLOBALS['TSFE']->config['config']['renderCharset'] * Only if there is a difference between the two charsets will a conversion be made * The conversion is done real-time - no caching for performance at this point! * diff --git a/typo3/sysext/impexp/class.tx_impexp.php b/typo3/sysext/impexp/class.tx_impexp.php index 76265715249166ab4db970e9ecb5ba33e35bd8ee..c67a1459cb3ab17792340b746de5d42a43191dd7 100755 --- a/typo3/sysext/impexp/class.tx_impexp.php +++ b/typo3/sysext/impexp/class.tx_impexp.php @@ -965,7 +965,7 @@ class tx_impexp { ); // Creating XML file from $outputArray: - $charset = $this->dat['header']['charset'] ? $this->dat['header']['charset'] : 'iso-8859-1'; + $charset = $this->dat['header']['charset'] ? $this->dat['header']['charset'] : 'utf-8'; $XML = '<?xml version="1.0" encoding="'.$charset.'" standalone="yes" ?>'.LF; $XML.= t3lib_div::array2xml($this->dat,'',0,'T3RecordDocument',0,$options); diff --git a/typo3/sysext/indexed_search/class.external_parser.php b/typo3/sysext/indexed_search/class.external_parser.php index b37f0450a9d72ad534a79e58f38ea9f622eed33b..118d237f0ff12e8cd1d270401e2c3d5f7230448c 100755 --- a/typo3/sysext/indexed_search/class.external_parser.php +++ b/typo3/sysext/indexed_search/class.external_parser.php @@ -493,8 +493,9 @@ class tx_indexed_search_extparse { case 'txt': case 'csv': // Raw text $content = t3lib_div::getUrl($absFile); - // TODO: Auto-registration of charset???? -> utf-8 (Current assuming western europe...) - $content = $this->pObj->convertHTMLToUtf8($content, 'iso-8859-1'); + // TODO: Implement auto detection of charset (currently assuming utf-8) + $contentCharset = 'utf-8'; + $content = $this->pObj->convertHTMLToUtf8($content, $contentCharset); $contentArr = $this->pObj->splitRegularContent($content); $contentArr['title'] = basename($absFile); // Make sure the title doesn't expose the absolute path! break; diff --git a/typo3/sysext/rtehtmlarea/class.tx_rtehtmlarea_base.php b/typo3/sysext/rtehtmlarea/class.tx_rtehtmlarea_base.php index 7172785d3fef5172bb1aead6d92afd782d43138e..74c12c17e747d4377fadb50fd18aa73d7180337f 100644 --- a/typo3/sysext/rtehtmlarea/class.tx_rtehtmlarea_base.php +++ b/typo3/sysext/rtehtmlarea/class.tx_rtehtmlarea_base.php @@ -328,7 +328,7 @@ class tx_rtehtmlarea_base extends t3lib_rteapi { $this->OutputCharset = $this->charset; $this->contentCharset = $LANG->csConvObj->charSetArray[$this->contentTypo3Language]; - $this->contentCharset = $this->contentCharset ? $this->contentCharset : 'iso-8859-1'; + $this->contentCharset = $this->contentCharset ? $this->contentCharset : 'utf-8'; $this->origContentCharSet = $this->contentCharset; $this->contentCharset = 'utf-8'; diff --git a/typo3/sysext/rtehtmlarea/mod3/class.tx_rtehtmlarea_browse_links.php b/typo3/sysext/rtehtmlarea/mod3/class.tx_rtehtmlarea_browse_links.php index 4012ae025f31da098add4b8d85ec74810d3e224f..e8ea0291c7291826f13cfeeeab83cdaa872fd42e 100644 --- a/typo3/sysext/rtehtmlarea/mod3/class.tx_rtehtmlarea_browse_links.php +++ b/typo3/sysext/rtehtmlarea/mod3/class.tx_rtehtmlarea_browse_links.php @@ -1139,7 +1139,7 @@ class tx_rtehtmlarea_browse_links extends browse_links { $LANG->lang = $this->contentTypo3Language; $LANG->origCharSet = $LANG->csConvObj->charSetArray[$this->contentTypo3Language]; - $LANG->origCharSet = $LANG->origCharSet ? $LANG->origCharSet : 'iso-8859-1'; + $LANG->origCharSet = $LANG->origCharSet ? $LANG->origCharSet : 'utf-8'; $LANG->charSet = $this->contentTypo3Charset; $LLString = $LANG->sL($string); diff --git a/typo3/sysext/rtehtmlarea/mod6/class.tx_rtehtmlarea_parse_html.php b/typo3/sysext/rtehtmlarea/mod6/class.tx_rtehtmlarea_parse_html.php index 82a0656ef6307a84aeb5ee1e7e8f97919e8b9f9c..20d03aecd8f06b0c8ba59fe894e9e8a70cd50630 100644 --- a/typo3/sysext/rtehtmlarea/mod6/class.tx_rtehtmlarea_parse_html.php +++ b/typo3/sysext/rtehtmlarea/mod6/class.tx_rtehtmlarea_parse_html.php @@ -78,7 +78,7 @@ class tx_rtehtmlarea_parse_html { $clientInfo = t3lib_div::clientInfo(); // the charset of the content element, possibly overidden by forceCharset - $toCharSet = t3lib_div::_GP('charset')?t3lib_div::_GP('charset'):'iso-8859-1'; + $toCharSet = t3lib_div::_GP('charset')?t3lib_div::_GP('charset'):'utf-8'; // IE wants it back in utf-8 if ( $clientInfo['BROWSER']= 'msie') { $toCharSet = 'utf-8'; diff --git a/typo3/sysext/rtehtmlarea/pi2/class.tx_rtehtmlarea_pi2.php b/typo3/sysext/rtehtmlarea/pi2/class.tx_rtehtmlarea_pi2.php index 9de5fe366996c57a613028469114dd630580791c..87edeeccf8b4e4f389dfcada5e633dffe6a2a3bd 100644 --- a/typo3/sysext/rtehtmlarea/pi2/class.tx_rtehtmlarea_pi2.php +++ b/typo3/sysext/rtehtmlarea/pi2/class.tx_rtehtmlarea_pi2.php @@ -169,7 +169,7 @@ class tx_rtehtmlarea_pi2 extends tx_rtehtmlarea_base { // Set the charset of the content $this->contentCharset = $TSFE->csConvObj->charSetArray[$this->contentTypo3Language]; - $this->contentCharset = $this->contentCharset ? $this->contentCharset : 'iso-8859-1'; + $this->contentCharset = $this->contentCharset ? $this->contentCharset : 'utf-8'; $this->contentCharset = trim($TSFE->config['config']['metaCharset']) ? trim($TSFE->config['config']['metaCharset']) : $this->contentCharset; /* ======================================= diff --git a/typo3/sysext/t3editor/res/tsref/tsref.xml b/typo3/sysext/t3editor/res/tsref/tsref.xml index 866497b064c439f2dd28f8e04f9715ea52c9297e..d42e4ecc5dd914a88c8cf402bf07c27c97cb79b4 100644 --- a/typo3/sysext/t3editor/res/tsref/tsref.xml +++ b/typo3/sysext/t3editor/res/tsref/tsref.xml @@ -774,7 +774,7 @@ If this property is set, images are not allowed to be scaled up in size. This pa </property> <property name="notification_email_charset" type="string"> <description><![CDATA[Alternative charset for the notification mails.]]></description> - <default><![CDATA[ISO-8859-1]]></default> + <default><![CDATA[utf-8]]></default> </property> <property name="notification_email_encoding" type="string"> <description><![CDATA[This sets the encoding of plaintext emails (notification messages). The default encoding is "quoted-printable". But setting this to eg. "base64" will encode the content with base64 encoding. diff --git a/typo3/template.php b/typo3/template.php index 42116aa35d4dffd428e783ddb7f76e653ca90799..8b64af318bf625f53b09a1fbdc3a92f8e581e337 100644 --- a/typo3/template.php +++ b/typo3/template.php @@ -128,7 +128,7 @@ class template { var $parseTimeFlag = 0; // Will output the parsetime of the scripts in milliseconds (for admin-users). Set this to FALSE when releasing TYPO3. Only for dev. // INTERNAL - var $charset = 'iso-8859-1'; // Default charset. see function initCharset() + var $charset = 'utf-8'; // Default charset. see function initCharset() var $sectionFlag=0; // Internal: Indicates if a <div>-output section is open var $divClass = ''; // (Default) Class for wrapping <DIV>-tag of page. Is set in class extensions.