Fonctions membres publiques | |
| parse_charset ($charset) | |
| get_locale_charset ($locale) | |
| conv ($str, $fromCS, $toCS, $useEntityForNoChar=0) | |
| convArray (&$array, $fromCS, $toCS, $useEntityForNoChar=0) | |
| utf8_encode ($str, $charset) | |
| utf8_decode ($str, $charset, $useEntityForNoChar=0) | |
| utf8_to_entities ($str) | |
| entities_to_utf8 ($str, $alsoStdHtmlEnt=0) | |
| utf8_to_numberarray ($str, $convEntities=0, $retChar=0) | |
| UnumberToChar ($cbyte) | |
| utf8CharToUnumber ($str, $hex=0) | |
| initCharset ($charset) | |
| initUnicodeData ($mode=null) | |
| initCaseFolding ($charset) | |
| initToASCII ($charset) | |
| substr ($charset, $string, $start, $len=null) | |
| strlen ($charset, $string) | |
| crop ($charset, $string, $len, $crop='') | |
| strtrunc ($charset, $string, $len) | |
| conv_case ($charset, $string, $case) | |
| specCharsToASCII ($charset, $string) | |
| sb_char_mapping ($str, $charset, $mode, $opt='') | |
| utf8_substr ($str, $start, $len=null) | |
| utf8_strlen ($str) | |
| utf8_strtrunc ($str, $len) | |
| utf8_strpos ($haystack, $needle, $offset=0) | |
| utf8_strrpos ($haystack, $needle) | |
| utf8_char2byte_pos ($str, $pos) | |
| utf8_byte2char_pos ($str, $pos) | |
| utf8_char_mapping ($str, $mode, $opt='') | |
| euc_strtrunc ($str, $len, $charset) | |
| euc_substr ($str, $start, $charset, $len=null) | |
| euc_strlen ($str, $charset) | |
| euc_char2byte_pos ($str, $pos, $charset) | |
| euc_char_mapping ($str, $charset, $mode, $opt='') | |
Attributs publics | |
| $noCharByteVal = 63 | |
| $parsedCharsets = array() | |
| $caseFolding = array() | |
| $toASCII = array() | |
| $twoByteSets | |
| $fourByteSets | |
| $eucBasedSets | |
| $synonyms | |
| $lang_to_script | |
| $script_to_charset_unix | |
| $script_to_charset_windows | |
| $locale_to_charset | |
| $charSetArray | |
| $isoArray | |
| t3lib_cs::parse_charset | ( | $ | charset | ) |
Normalize - changes input character set to lowercase letters.
| string | Input charset |
| t3lib_cs::get_locale_charset | ( | $ | locale | ) |
| t3lib_cs::conv | ( | $ | str, | |
| $ | fromCS, | |||
| $ | toCS, | |||
| $ | useEntityForNoChar = 0 | |||
| ) |
Convert from one charset to another charset.
| string | Input string | |
| string | From charset (the current charset of the string) | |
| string | To charset (the output charset wanted) | |
| boolean | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
| t3lib_cs::convArray | ( | &$ | array, | |
| $ | fromCS, | |||
| $ | toCS, | |||
| $ | useEntityForNoChar = 0 | |||
| ) |
Convert all elements in ARRAY from one charset to another charset. NOTICE: Array is passed by reference!
| string | Input array, possibly multidimensional | |
| string | From charset (the current charset of the string) | |
| string | To charset (the output charset wanted) | |
| boolean | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
| t3lib_cs::utf8_encode | ( | $ | str, | |
| $ | charset | |||
| ) |
Converts $str from $charset to UTF-8
| string | String in local charset to convert to UTF-8 | |
| string | Charset, lowercase. Must be found in csconvtbl/ folder. |
| t3lib_cs::utf8_decode | ( | $ | str, | |
| $ | charset, | |||
| $ | useEntityForNoChar = 0 | |||
| ) |
Converts $str from UTF-8 to $charset
| string | String in UTF-8 to convert to local charset | |
| string | Charset, lowercase. Must be found in csconvtbl/ folder. | |
| boolean | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
| t3lib_cs::utf8_to_entities | ( | $ | str | ) |
Converts all chars > 127 to numeric entities.
| string | Input string |
| t3lib_cs::entities_to_utf8 | ( | $ | str, | |
| $ | alsoStdHtmlEnt = 0 | |||
| ) |
Converts numeric entities (UNICODE, eg. decimal (Ӓ) or hexadecimal (&x1b;)) to UTF-8 multibyte chars
| string | Input string, UTF-8 | |
| boolean | If set, then all string-HTML entities (like & or will be converted as well) |
| t3lib_cs::utf8_to_numberarray | ( | $ | str, | |
| $ | convEntities = 0, |
|||
| $ | retChar = 0 | |||
| ) |
Converts all chars in the input UTF-8 string into integer numbers returned in an array
| string | Input string, UTF-8 | |
| boolean | If set, then all HTML entities (like & or or { or &x3f5d;) will be detected as characters. | |
| boolean | If set, then instead of integer numbers the real UTF-8 char is returned. |
| t3lib_cs::UnumberToChar | ( | $ | cbyte | ) |
Converts a UNICODE number to a UTF-8 multibyte character Algorithm based on script found at From: http://czyborra.com/utf/ Unit-tested by Kasper
The binary representation of the character's integer value is thus simply spread across the bytes and the number of high bits set in the lead byte announces the number of bytes in the multibyte sequence:
bytes | bits | representation 1 | 7 | 0vvvvvvv 2 | 11 | 110vvvvv 10vvvvvv 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv 5 | 26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 6 | 31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
| integer | UNICODE integer |
| t3lib_cs::utf8CharToUnumber | ( | $ | str, | |
| $ | hex = 0 | |||
| ) |
Converts a UTF-8 Multibyte character to a UNICODE number Unit-tested by Kasper
| string | UTF-8 multibyte character string | |
| boolean | If set, then a hex. number is returned. |
| t3lib_cs::initCharset | ( | $ | charset | ) |
This will initialize a charset for use if it's defined in the PATH_t3lib.'csconvtbl/' folder This function is automatically called by the conversion functions
PLEASE SEE: http://www.unicode.org/Public/MAPPINGS/
| string | The charset to be initialized. Use lowercase charset always (the charset must match exactly with a filename in csconvtbl/ folder ([charset].tbl) |
| t3lib_cs::initUnicodeData | ( | $ | mode = null |
) |
This function initializes all UTF-8 character data tables.
PLEASE SEE: http://www.unicode.org/Public/UNIDATA/
| string | Mode ("case", "ascii", ...) |
| t3lib_cs::initCaseFolding | ( | $ | charset | ) |
This function initializes the folding table for a charset other than UTF-8. This function is automatically called by the case folding functions.
| string | Charset for which to initialize case folding. |
| t3lib_cs::initToASCII | ( | $ | charset | ) |
This function initializes the to-ASCII conversion table for a charset other than UTF-8. This function is automatically called by the ASCII transliteration functions.
| string | Charset for which to initialize conversion. |
| t3lib_cs::substr | ( | $ | charset, | |
| $ | string, | |||
| $ | start, | |||
| $ | len = null | |||
| ) |
Returns a part of a string. Unit-tested by Kasper (single byte charsets only)
| string | The character set | |
| string | Character string | |
| integer | Start position (character position) | |
| integer | Length (in characters) |
| t3lib_cs::strlen | ( | $ | charset, | |
| $ | string | |||
| ) |
Counts the number of characters. Unit-tested by Kasper (single byte charsets only)
| string | The character set | |
| string | Character string |
| t3lib_cs::crop | ( | $ | charset, | |
| $ | string, | |||
| $ | len, | |||
| $ | crop = '' | |||
| ) |
Truncates a string and pre-/appends a string. Unit tested by Kasper
| string | The character set | |
| string | Character string | |
| integer | Length (in characters) | |
| string | Crop signifier |
| t3lib_cs::strtrunc | ( | $ | charset, | |
| $ | string, | |||
| $ | len | |||
| ) |
Cuts a string short at a given byte length.
| string | The character set | |
| string | Character string | |
| integer | The byte length |
| t3lib_cs::conv_case | ( | $ | charset, | |
| $ | string, | |||
| $ | case | |||
| ) |
Translates all characters of a string into their respective case values. Unlike strtolower() and strtoupper() this method is locale independent. Note that the string length may change! eg. lower case German �(sharp S) becomes upper case "SS" Unit-tested by Kasper Real case folding is language dependent, this method ignores this fact.
| string | Character set of string | |
| string | Input string to convert case for | |
| string | Case keyword: "toLower" means lowercase conversion, anything else is uppercase (use "toUpper" ) |
| t3lib_cs::specCharsToASCII | ( | $ | charset, | |
| $ | string | |||
| ) |
Converts special chars (like ���, umlauts etc) to ascii equivalents (usually double-bytes, like �=> ae etc.)
| string | Character set of string | |
| string | Input string to convert |
| t3lib_cs::sb_char_mapping | ( | $ | str, | |
| $ | charset, | |||
| $ | mode, | |||
| $ | opt = '' | |||
| ) |
Maps all characters of a string in a single byte charset.
| string | the string | |
| string | the charset | |
| string | mode: 'case' (case folding) or 'ascii' (ASCII transliteration) | |
| string | 'case': conversion 'toLower' or 'toUpper' |
| t3lib_cs::utf8_substr | ( | $ | str, | |
| $ | start, | |||
| $ | len = null | |||
| ) |
| t3lib_cs::utf8_strlen | ( | $ | str | ) |
| t3lib_cs::utf8_strtrunc | ( | $ | str, | |
| $ | len | |||
| ) |
Truncates a string in UTF-8 short at a given byte length.
| string | UTF-8 multibyte character string | |
| integer | the byte length |
| t3lib_cs::utf8_strpos | ( | $ | haystack, | |
| $ | needle, | |||
| $ | offset = 0 | |||
| ) |
Find position of first occurrence of a string, both arguments are in UTF-8.
| string | UTF-8 string to search in | |
| string | UTF-8 string to search for | |
| integer | Positition to start the search |
| t3lib_cs::utf8_strrpos | ( | $ | haystack, | |
| $ | needle | |||
| ) |
Find position of last occurrence of a char in a string, both arguments are in UTF-8.
| string | UTF-8 string to search in | |
| string | UTF-8 character to search for (single character) |
| t3lib_cs::utf8_char2byte_pos | ( | $ | str, | |
| $ | pos | |||
| ) |
Translates a character position into an 'absolute' byte position. Unit tested by Kasper.
| string | UTF-8 string | |
| integer | Character position (negative values start from the end) |
| t3lib_cs::utf8_byte2char_pos | ( | $ | str, | |
| $ | pos | |||
| ) |
Translates an 'absolute' byte position into a character position. Unit tested by Kasper.
| string | UTF-8 string | |
| integer | byte position |
| t3lib_cs::utf8_char_mapping | ( | $ | str, | |
| $ | mode, | |||
| $ | opt = '' | |||
| ) |
Maps all characters of an UTF-8 string.
| string | UTF-8 string | |
| string | mode: 'case' (case folding) or 'ascii' (ASCII transliteration) | |
| string | 'case': conversion 'toLower' or 'toUpper' |
| t3lib_cs::euc_strtrunc | ( | $ | str, | |
| $ | len, | |||
| $ | charset | |||
| ) |
Cuts a string in the EUC charset family short at a given byte length.
| string | EUC multibyte character string | |
| integer | the byte length | |
| string | the charset |
| t3lib_cs::euc_substr | ( | $ | str, | |
| $ | start, | |||
| $ | charset, | |||
| $ | len = null | |||
| ) |
Returns a part of a string in the EUC charset family.
| string | EUC multibyte character string | |
| integer | start position (character position) | |
| string | the charset | |
| integer | length (in characters) |
| t3lib_cs::euc_strlen | ( | $ | str, | |
| $ | charset | |||
| ) |
Counts the number of characters of a string in the EUC charset family.
| string | EUC multibyte character string | |
| string | the charset |
| t3lib_cs::euc_char2byte_pos | ( | $ | str, | |
| $ | pos, | |||
| $ | charset | |||
| ) |
Translates a character position into an 'absolute' byte position.
| string | EUC multibyte character string | |
| integer | character position (negative values start from the end) | |
| string | the charset |
| t3lib_cs::euc_char_mapping | ( | $ | str, | |
| $ | charset, | |||
| $ | mode, | |||
| $ | opt = '' | |||
| ) |
Maps all characters of a string in the EUC charset family.
| string | EUC multibyte character string | |
| string | the charset | |
| string | mode: 'case' (case folding) or 'ascii' (ASCII transliteration) | |
| string | 'case': conversion 'toLower' or 'toUpper' |
| t3lib_cs::$noCharByteVal = 63 |
| t3lib_cs::$parsedCharsets = array() |
| t3lib_cs::$caseFolding = array() |
| t3lib_cs::$toASCII = array() |
| t3lib_cs::$twoByteSets |
Valeur initiale :
array(
'ucs-2'=>1, // 2-byte Unicode
)
| t3lib_cs::$fourByteSets |
Valeur initiale :
array(
'ucs-4'=>1, // 4-byte Unicode
'utf-32'=>1, // 4-byte Unicode (limited to the 21-bits of UTF-16)
)
| t3lib_cs::$eucBasedSets |
Valeur initiale :
array(
'gb2312'=>1, // Chinese, simplified.
'big5'=>1, // Chinese, traditional.
'euc-kr'=>1, // Korean
'shift_jis'=>1, // Japanese - WARNING: Shift-JIS includes half-width katakana single-bytes characters above 0x80!
)
| t3lib_cs::$synonyms |
| t3lib_cs::$lang_to_script |
| t3lib_cs::$script_to_charset_unix |
Valeur initiale :
array(
'west_european' => 'iso-8859-1',
'estonian' => 'iso-8859-1',
'east_european' => 'iso-8859-2',
'baltic' => 'iso-8859-4',
'cyrillic' => 'iso-8859-5',
'arabic' => 'iso-8859-6',
'greek' => 'iso-8859-7',
'hebrew' => 'iso-8859-8',
'turkish' => 'iso-8859-9',
'thai' => 'iso-8859-11', // = TIS-620
'lithuanian' => 'iso-8859-13',
'chinese' => 'gb2312', // = euc-cn
'japanese' => 'euc-jp',
'korean' => 'euc-kr',
'simpl_chinese' => 'gb2312',
'trad_chinese' => 'big5',
'vietnamese' => '',
'unicode' => 'utf-8',
)
| t3lib_cs::$script_to_charset_windows |
Valeur initiale :
array(
'east_european' => 'windows-1250',
'cyrillic' => 'windows-1251',
'west_european' => 'windows-1252',
'greek' => 'windows-1253',
'turkish' => 'windows-1254',
'hebrew' => 'windows-1255',
'arabic' => 'windows-1256',
'baltic' => 'windows-1257',
'estonian' => 'windows-1257',
'lithuanian' => 'windows-1257',
'vietnamese' => 'windows-1258',
'thai' => 'cp874',
'korean' => 'cp949',
'chinese' => 'gb2312',
'japanese' => 'shift_jis',
'simpl_chinese' => 'gb2312',
'trad_chinese' => 'big5',
)
| t3lib_cs::$locale_to_charset |
Valeur initiale :
array(
'japanese.euc' => 'euc-jp',
'ja_jp.ujis' => 'euc-jp',
'korean.euc' => 'euc-kr',
'sr@Latn' => 'iso-8859-2',
'zh_cn' => 'gb2312',
'zh_hk' => 'big5',
'zh_tw' => 'big5',
)
| t3lib_cs::$charSetArray |
| t3lib_cs::$isoArray |
Valeur initiale :
array(
'ba' => 'bs',
'br' => 'pt_BR',
'ch' => 'zh_CN',
'cz' => 'cs',
'dk' => 'da',
'si' => 'sl',
'se' => 'sv',
'gl' => 'kl',
'gr' => 'el',
'hk' => 'zh_HK',
'kr' => 'ko',
'ua' => 'uk',
'jp' => 'ja',
'vn' => 'vi',
)
1.5.3