Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
| Total | |
100.00% |
1 / 1 |
|
100.00% |
36 / 36 |
CRAP | |
100.00% |
200 / 200 |
| String | |
100.00% |
1 / 1 |
|
100.00% |
36 / 36 |
129 | |
100.00% |
200 / 200 |
| isUtf8Sequence($seq) | |
100.00% |
1 / 1 |
34 | |
100.00% |
20 / 20 |
|||
| fixCodePageString($str, $encoding) | |
100.00% |
1 / 1 |
6 | |
100.00% |
18 / 18 |
|||
| fixString($str, $toDecode=self::DECODE_NONE, $encoding="UTF-8") | |
100.00% |
1 / 1 |
5 | |
100.00% |
22 / 22 |
|||
| unsafeSubstr($start, $length) | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| __construct($str="", $toDecode=self::DECODE_NONE, $encoding="UTF-8") | |
100.00% |
1 / 1 |
3 | |
100.00% |
5 / 5 |
|||
| create($str="", $toDecode=self::DECODE_NONE, $encoding="UTF-8") | |
100.00% |
1 / 1 |
3 | |
100.00% |
6 / 6 |
|||
| getCollator() | |
100.00% |
1 / 1 |
2 | |
100.00% |
4 / 4 |
|||
| setCollator(\Collator $coll) | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
| __get($name) | |
100.00% |
1 / 1 |
3 | |
100.00% |
3 / 3 |
|||
| __toString() | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| equals(\Scrivo\String $str) | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| getLength() | |
100.00% |
1 / 1 |
2 | |
100.00% |
4 / 4 |
|||
| count() | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| current() | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| key() | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| next() | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
| rewind() | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
| valid() | |
100.00% |
1 / 1 |
2 | |
100.00% |
1 / 1 |
|||
| offsetSet($offset, $value) | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
| offsetGet($offset) | |
100.00% |
1 / 1 |
2 | |
100.00% |
4 / 4 |
|||
| offsetExists($offset) | |
100.00% |
1 / 1 |
2 | |
100.00% |
1 / 1 |
|||
| offsetUnset($offset) | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
| substr($start, $length=0xFFFF) | |
100.00% |
1 / 1 |
3 | |
100.00% |
5 / 5 |
|||
| substring($start, $end) | |
100.00% |
1 / 1 |
4 | |
100.00% |
5 / 5 |
|||
| trim() | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
| contains(\Scrivo\String $str, $offset=0, $ignoreCase=false) | |
100.00% |
1 / 1 |
4 | |
100.00% |
7 / 7 |
|||
| indexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false) | |
100.00% |
1 / 1 |
5 | |
100.00% |
9 / 9 |
|||
| lastIndexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false) | |
100.00% |
1 / 1 |
6 | |
100.00% |
12 / 12 |
|||
| firstOccurranceOf(\Scrivo\String $str, $part=false, $ignoreCase=false) | |
100.00% |
1 / 1 |
4 | |
100.00% |
9 / 9 |
|||
| lastOccurranceOf(\Scrivo\String $str, $part=false, $ignoreCase=false) | |
100.00% |
1 / 1 |
4 | |
100.00% |
9 / 9 |
|||
| replace($from, $to) | |
100.00% |
1 / 1 |
13 | |
100.00% |
21 / 21 |
|||
| split(\Scrivo\String $delimiter, $limit=0) | |
100.00% |
1 / 1 |
4 | |
100.00% |
9 / 9 |
|||
| toLowerCase() | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| toUpperCase() | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| compareTo(\Scrivo\String $str) | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| inArray($arr) | |
100.00% |
1 / 1 |
3 | |
100.00% |
5 / 5 |
|||
| <?php | |
| /* Copyright (c) 2012, Geert Bergman (geert@scrivo.nl) | |
| * All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions are met: | |
| * | |
| * 1. Redistributions of source code must retain the above copyright notice, | |
| * this list of conditions and the following disclaimer. | |
| * 2. Redistributions in binary form must reproduce the above copyright notice, | |
| * this list of conditions and the following disclaimer in the documentation | |
| * and/or other materials provided with the distribution. | |
| * 3. Neither the name of "Scrivo" nor the names of its contributors may be | |
| * used to endorse or promote products derived from this software without | |
| * specific prior written permission. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| * POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| * $Id: String.php 841 2013-08-19 22:19:47Z geert $ | |
| */ | |
| /** | |
| * Implementation of the \Scrivo\String class. | |
| */ | |
| namespace Scrivo; | |
| /** | |
| * Wrapper class for PHP strings to enforce consistent and safe multi-byte | |
| * (UTF-8) string handling. | |
| * | |
| * \Scrivo\String is a primitive wrapper class for PHP strings to make sure that | |
| * all operations performed on the string are UTF-8 safe. As PHP does not | |
| * enforce a consistent way to deal with multibyte strings we do it | |
| * ourselves. In the Scrivo code base UTF-8 is the only encoding that is | |
| * supported for operations on data and these operations should be done | |
| * through instances of the \Scrivo\String class. If strings are used as byte | |
| * arrays, use the ByteArray class. | |
| * | |
| * \Scrivo\String objects are imutable: once created you can't change them. All | |
| * operations on a \Scrivo\String object will return a new \Scrivo\String object. | |
| * | |
| * Although we'll be working with UTF-8 exclusively it is possible to create | |
| * \Scrivo\String objects that contain characters from 8 byte encoding schemes. | |
| * Also a note on HTML entities, we work with UTF-8 so you don't need them: | |
| * they are evil. Except entities for the reserved HTML characters (<>&'") | |
| * there is really no use for them in UTF-8 strings. And when stored in a | |
| * database only cause sorting and lookup errors. Therefore when construction | |
| * \Scrivo\String objects you can opt to convert existing HTML entities to their | |
| * corresonding UTF-8 characters. | |
| * | |
| * The current locale setting for LC_COLLATE is important. | |
| * \Scrivo\String::compareTo() will use this setting when comparing strings. | |
| * | |
| * Please note: you might be tempted to do string comparison using | |
| * equality operators (==). Although this works in most cases don't do this: | |
| * you'll do PHP object comparison (i.e. comparing a | |
| * \Scrivo\String object) and that is not what you want: use \Scrivo\String::equals() | |
| * or \Scrivo\String::compareTo() to compare strings. | |
| */ | |
| class String implements \Iterator, \ArrayAccess, \Countable { | |
| /** | |
| * Constant to denote ISO-8859-1 encoding. This is the default encoding | |
| * for \Scrivo\String uses for fixing and comparing. | |
| */ | |
| const ENC_ISO_8859_1 = "ISO-8859-1"; | |
| /** | |
| * Constant to denote CP-1251 encoding. | |
| */ | |
| const ENC_CP_1251 = "CP-1251"; | |
| /** | |
| * Constant to indicate that you don't want to decode any entities when | |
| * constructing the string. | |
| */ | |
| const DECODE_NONE = 0; | |
| /** | |
| * Constant to indicate that you want to decode all entities when | |
| * constructing the string. | |
| */ | |
| const DECODE_ALL = 1; | |
| /** | |
| * Constant to indicate that you want to decode all but the entities for | |
| * reserved characters (&<>'") when constructing the string. | |
| */ | |
| const DECODE_UNRESERVED = 2; | |
| /** | |
| * The primitive UTF-8 string. | |
| * @var string | |
| */ | |
| private $str; | |
| /** | |
| * The current position when iterating. | |
| * @var string | |
| */ | |
| private $pos; | |
| /** | |
| * The length of the string (characters not bytes). | |
| * @var int | |
| */ | |
| private $len = -1; | |
| /** | |
| * Collator used for sorting. This is a static shared amongst instances. | |
| * @var \Collator | |
| */ | |
| private static $coll; | |
| /** | |
| * Map to translate 8 byte code page characters to UTF-8 sequences. | |
| * @var array[] | |
| */ | |
| private static $maps = array( | |
| self::ENC_ISO_8859_1 => array(128 => | |
| "€","�","‚","ƒ","„","…","†","‡","ˆ","‰","Š","‹","Œ","�","Ž","�", | |
| "�","‘","’","“","”","•","–","—","˜","™","š","›","œ","�","ž","Ÿ", | |
| " ","¡","¢","£","¤","¥","¦","§","¨","©","ª","«","¬","","®","¯", | |
| "°","±","²","³","´","µ","¶","·","¸","¹","º","»","¼","½","¾","¿", | |
| "À","Á","Â","Ã","Ä","Å","Æ","Ç","È","É","Ê","Ë","Ì","Í","Î","Ï", | |
| "Ð","Ñ","Ò","Ó","Ô","Õ","Ö","×","Ø","Ù","Ú","Û","Ü","Ý","Þ","ß", | |
| "à","á","â","ã","ä","å","æ","ç","è","é","ê","ë","ì","í","î","ï", | |
| "ð","ñ","ò","ó","ô","õ","ö","÷","ø","ù","ú","û","ü","ý","þ","ÿ", | |
| ), | |
| self::ENC_CP_1251 => array(128 => | |
| "Ђ","Ѓ","‚","ѓ","„","…","†","‡","€","‰","Љ","‹","Њ","Ќ","Ћ","Џ", | |
| "ђ","‘","’","“","”","•","–","—","�","™","љ","›","њ","ќ","ћ","џ", | |
| " ","Ў","ў","Ј","¤","Ґ","¦","§","Ё","©","Є","«","¬","","®","Ї", | |
| "°","±","І","і","ґ","µ","¶","·","ё","№","є","»","ј","Ѕ","ѕ","ї", | |
| "А","Б","В","Г","Д","Е","Ж","З","И","Й","К","Л","М","Н","О","П", | |
| "Р","С","Т","У","Ф","Х","Ц","Ч","Ш","Щ","Ъ","Ы","Ь","Э","Ю","Я", | |
| "а","б","в","г","д","е","ж","з","и","й","к","л","м","н","о","п", | |
| "р","с","т","у","ф","х","ц","ч","ш","щ","ъ","ы","ь","э","ю","я", | |
| ), | |
| ); | |
| /** | |
| * Test if a given byte sequence is a valid UTF-8 sequence. | |
| * | |
| * If the tested byte sequence is a valid UTF-8 sequence the method | |
| * returns the length of the sequence, else the method returns 0. | |
| * | |
| * @param string $seq The byte sequence to test. | |
| * | |
| * @return int The length of the UTF-8 sequence (2-4) or 0 if the | |
| * sequence is not an UTF-8 sequence. | |
| */ | |
| private function isUtf8Sequence($seq) { | |
| // check if the first byte is a UTF-8 marker and if not end it here | |
| $b1 = ord($seq[0]); | |
| if ($b1<0xC2 || $b1>=0xF5) { | |
| return 0; | |
| } | |
| // get the length to prevent overshooting when reading | |
| $len = strlen($seq); | |
| // Get the 2nd and 3rd byte and test it, note that for a valid UTF-8 | |
| // sequence we only allow the byte (here byte three) after the | |
| // sequence to be none, an ascii character, or a new UTF-8 marker | |
| // (which is more limiting than not to allow continuation bytes | |
| // (b3 < 0x80 && b3 >= OxBF) only). | |
| $b2 = ($len>1) ? ord($seq[1]) : 0; | |
| $b3 = ($len>2) ? ord($seq[2]) : 0; | |
| if ($b1>=0xC2 && $b1<0xE0 && $b2>=0x80 && $b2<0xC0 | |
| && ($b3<0x80 || ($b3>=0xC2 && $b3<0xF5))) { | |
| return 2; | |
| } | |
| // We're not there, test for a 3 byte byte sequence. See the comment | |
| // above on testing the 4th byte. | |
| $b4 = ($len>3) ? ord($seq[3]) : 0; | |
| if ($b1>=0xE0 && $b1<0xF0 | |
| && $b2>=0x80 && $b2<0xC0 && $b3>=0x80 && $b3<0xC0 | |
| && ($b4<0x80 || ($b4>=0xC2 && $b4<0xF5))) { | |
| return 3; | |
| } | |
| // We're not there, test for a 3 byte byte sequence. See the comment | |
| // above on testing the 5th byte. Also note that b2 is restricted | |
| // so that we keep in the <= U+10FFFF range | |
| $b5 = ($len>4) ? ord($seq[4]) : 0; | |
| if ($b1>=0xF0 && $b1<0xF5 && $b2>=0x80 && $b2<0xC0 | |
| && $b3>=0x80 && $b3<0xC0 && $b4>=0x80 && $b4<0xC0 | |
| && ($b5<0x80 || ($b5>=0xC2 && $b5<0xF5))) { | |
| return 4; | |
| } | |
| // This is not a valid UTF-8 sequence. | |
| return 0; | |
| } | |
| /** | |
| * Convert a string with UTF-8 and code page characters to a valid UTF-8 | |
| * string. | |
| * | |
| * When converting the input string to UTF-8 all bytes in the 0x80-0xFF | |
| * range are first tested if they are is a valid UTF-8 byte sequences, if | |
| * not it is assumed that it is an 8 byte code page character and | |
| * converted according to the given encoding. Supported encodings are: | |
| * | |
| * * Utf8string::ENC_ISO_8859_1 | |
| * * Utf8string::ENC_CP_1251 | |
| * | |
| * @param string $str The string with mixed UTF-8 and and 8 byte code | |
| * page characters. | |
| * @param string $encoding The encoding to use when converting 8 byte code | |
| * page characters to UTF-8. | |
| * | |
| * @return string A valid UTF-8 string. | |
| */ | |
| private function fixCodePageString($str, $encoding) { | |
| // set the encoding | |
| if ($encoding != self::ENC_ISO_8859_1 | |
| && $encoding != self::ENC_CP_1251) { | |
| throw | |
| new \Scrivo\SystemException("Unsupported encoding: $encoding"); | |
| } | |
| // Split the data on any occurance of a byte with the high bit set | |
| $parts = | |
| preg_split('/[\x80-\xFF]/', $str, -1, PREG_SPLIT_OFFSET_CAPTURE); | |
| // See if there's anything to do | |
| $c = count($parts); | |
| if ($c<=1) { | |
| return $str; | |
| } | |
| // Start with the first part | |
| $out = $parts[0][0]; | |
| for ($i=1; $i<$c; $i++) { | |
| // Get a 6 byte sequence on a split location ... | |
| $seq = substr($str, $parts[$i][1]-1, 6); | |
| // ... and check if is a valid UTF-8 byte sequence, ... | |
| $utf8_seq_width = $this->isUtf8Sequence($seq); | |
| if ($utf8_seq_width) { | |
| // ... if so add it to output ... | |
| $res = substr($seq, 0, $utf8_seq_width); | |
| // ... and jump over the parts. | |
| $i += ($utf8_seq_width - 1); | |
| } else { | |
| // ... else treat it as a codepage character | |
| $res = self::$maps[$encoding][ord($seq[0])]; | |
| } | |
| // add the UTF-8 character and next part to the output | |
| $out .= $res.$parts[$i][0]; | |
| } | |
| return $out; | |
| } | |
| /** | |
| * Convert a string with HTML entities, UTF-8 and code page characters | |
| * to a valid UTF-8 string. | |
| * | |
| * When converting the input string to UTF-8 all bytes in the 0x80-0xFF | |
| * range are first tested if they are is a valid UTF-8 byte sequences, if | |
| * not it is assumed that it is an 8 byte code page character and | |
| * converted according to the given encoding. Supported encodings are: | |
| * | |
| * * Utf8string::ENC_ISO_8859_1 | |
| * * Utf8string::ENC_CP_1251 | |
| * | |
| * You can opt to convert HTML entities in the string to their | |
| * corresponding characters. Possible choices are: | |
| * | |
| * * Utf8string::DECODE_NONE don't decode HTML entities | |
| * * Utf8string::DECODE_ALL, decode all HTML entities; | |
| * * Utf8string::DECODE_UNRESERVED, decode all but the HTML entities | |
| * for <>&' and ' (HTML/XML) | |
| * | |
| * @param string $str The source string, a possible mixture of HTML | |
| * entities, UTF-8 and code page characters. | |
| * @param int $toDecode Which entities | |
| * @param string $encoding The encoding to use when converting 8 byte code | |
| * page characters to UTF-8. | |
| * | |
| * @return string A valid UTF-8 string. | |
| */ | |
| private function fixString($str, $toDecode=self::DECODE_NONE, | |
| $encoding="UTF-8") { | |
| // List of HTML-entities we want to keep. | |
| $reserved = array( | |
| "<", ">", "&", """, "'", | |
| "<", ">", "&", """, "'", | |
| "<",">", "&", """, "'" | |
| ); | |
| // List of HTML-entity markers to replace the ones you want to | |
| // keep, so html_entity_decode will leave them alone. | |
| $save = array( | |
| "#*@lt!;", "#*@gt!;", "#*@amp!;", "#*@quot!;", "#*@#039!;", | |
| "#*@lt!;", "#*@gt!;", "#*@amp!;", "#*@quot!;", "#*@#039!;", | |
| "#*@lt!;", "#*@gt!;", "#*@amp!;", "#*@quot!;", "#*@#039!;" | |
| ); | |
| if ($toDecode == self::DECODE_UNRESERVED) { | |
| // 'Save' entities for reserved characters. | |
| $str = str_replace($reserved, $save, $str); | |
| } | |
| if ($encoding != "UTF-8") { | |
| // Fix characters that are not properly UTF-8 encoded | |
| $str = $this->fixCodePageString($str, $encoding); | |
| } | |
| if ($toDecode != self::DECODE_NONE) { | |
| // Change all entities to their corresponding UTF-8 characters. | |
| $str = html_entity_decode($str, ENT_QUOTES, "UTF-8"); | |
| } | |
| if ($toDecode == self::DECODE_UNRESERVED) { | |
| // 'Restore' previously saved entities. | |
| $str = str_replace(array_slice($save, 0, 5), | |
| array_slice($reserved, 0, 5), $str); | |
| } | |
| return $str; | |
| } | |
| /** | |
| * Get a substring from a string without first checking the boundaries. | |
| * | |
| * @param int $start Start offset for the substring, use a negative number | |
| * to use an offset from the end of the string. | |
| * @param int $length The length of the substring. | |
| * | |
| * @return \Scrivo\String The requested portion of this string. | |
| */ | |
| private function unsafeSubstr($start, $length) { | |
| return new \Scrivo\String(mb_substr($this->str, $start, $length, "UTF-8")); | |
| } | |
| /** | |
| * Construct an \Scrivo\String. | |
| * | |
| * You can either construct an \Scrivo\String object from a valid UTF-8 string, | |
| * or from a string that you expect not to contain valid UTF-8 data. In the | |
| * latter case use the $toDecode and/or $encoding parameters. | |
| * | |
| * Possible choices for $toDecode are: | |
| * | |
| * * Utf8string::DECODE_NONE don't decode HTML entities | |
| * * Utf8string::DECODE_ALL, decode all HTML entities; | |
| * * Utf8string::DECODE_UNRESERVED, decode all but the HTML entities | |
| * for <>&' and ' (HTML/XML) | |
| * | |
| * If you expect that the source string contains 8 byte code page character | |
| * then you can select the encoding to use to convert them to their | |
| * corresponding UTF-8 characters. Supported encodings are: | |
| * | |
| * * Utf8string::ENC_ISO_8859_1 | |
| * * Utf8string::ENC_CP_1251 | |
| * | |
| * Note: typical use of the $toDecode and $encoding parameters is when | |
| * you want to 'sanitize' data before you store it into a database. Setting | |
| * these parameters start CPU intensive procedures so it's best not to use | |
| * them in bluk operations (like that inner loop or slashdotted home page). | |
| * And remember when all data was safely stored as UTF-8, there will be | |
| * no need to 'sanitize' it before displaying. | |
| * | |
| * @param string $str The source string, a possible mixture of HTML | |
| * entities, UTF-8 and code page characters. | |
| * @param int $toDecode Which entities | |
| * @param string $encoding The encoding to use when converting 8 byte code | |
| * page characters to UTF-8. | |
| */ | |
| public function __construct($str="", $toDecode=self::DECODE_NONE, | |
| $encoding="UTF-8") { | |
| $str = (string)$str; | |
| $this->str = $toDecode==self::DECODE_NONE && $encoding=="UTF-8" ? $str | |
| : $this->fixString($str, $toDecode, $encoding); | |
| $this->pos = 0; | |
| } | |
| /** | |
| * Factory method to construct an \Scrivo\String. | |
| * | |
| * @see \Scrivo\String::__construct() | |
| * | |
| * @param string $str The string to create the wrapper for. It is assumed | |
| * that this will be a valid UTF-8 string. If this is not the case, | |
| * you'll need to set the additional parameters. | |
| * @param int $toDecode Which entities | |
| * @param string $encoding The encoding to use when converting 8 byte code | |
| * | |
| * @return \Scrivo\String|\Scrivo\String An \Scrivo\String wrapper object. | |
| */ | |
| public static function create($str="", $toDecode=self::DECODE_NONE, | |
| $encoding="UTF-8") { | |
| if (is_array($str)) { | |
| foreach($str as $k=>$v) { | |
| $str[$k] = self::create($v, $toDecode, $encoding); | |
| } | |
| return $str; | |
| } | |
| return new \Scrivo\String($str, $toDecode, $encoding); | |
| } | |
| /** | |
| * Get the collator for sorting strings. | |
| * | |
| * @return \Collator The currently set collator for the \Scrivo\String | |
| * class. | |
| */ | |
| public static function getCollator() { | |
| if (!self::$coll) { | |
| self::$coll = new \Collator(\Locale::getDefault()); | |
| } | |
| return self::$coll; | |
| } | |
| /** | |
| * Set the collator for sorting strings. | |
| * | |
| * @param \Collator $coll The collator to use. | |
| */ | |
| public static function setCollator(\Collator $coll) { | |
| self::$coll = $coll; | |
| } | |
| /** | |
| * Implementation of the readable properties using the PHP magic | |
| * method __get(). | |
| * | |
| * @param string $name The name of the property to get. | |
| * | |
| * @return mixed The value of the requested property. | |
| */ | |
| public function __get($name) { | |
| switch($name) { | |
| case "length": return $this->getLength(); | |
| case "collator": return self::getCollator(); | |
| } | |
| throw new \Scrivo\SystemException("No such property '$name'."); | |
| } | |
| /** | |
| * Return the primitive UTF-8 string for this instance. | |
| * | |
| * @return string The primitive UTF-8 string for this instance. | |
| */ | |
| public function __toString() { | |
| return $this->str; | |
| } | |
| /** | |
| * Test if this string equals another \Scrivo\String object. | |
| * | |
| * When you want test \Scrivo\String object for equality, use this method | |
| * and never the equality operator (==) because then you'll compare | |
| * objects and therefore all data members of \Scrivo\String and this can | |
| * give you other results (or cast the \Scrivo\String strings to PHP strings | |
| * before comparing). | |
| * | |
| * @param \Scrivo\String $str The string to compare this string to. | |
| * | |
| * @return boolean True if the given string equals this string. | |
| */ | |
| public function equals(\Scrivo\String $str) { | |
| return (string)$this->str == (string)$str; | |
| } | |
| /** | |
| * Get the length of the string. | |
| * | |
| * @return int The length of the string in characters (not bytes). | |
| */ | |
| public function getLength() { | |
| if ($this->len == -1) { | |
| $this->len = mb_strlen($this->str, "UTF-8"); | |
| } | |
| return $this->len; | |
| } | |
| /** | |
| * Return the character count of the string. | |
| * | |
| * This is an alias for getLength() and part of the implementation of | |
| * Countable. | |
| * | |
| * @return int The length of the string in characters. | |
| */ | |
| public function count() { | |
| return $this->getLength(); | |
| } | |
| /** | |
| * Return the current UTF-8 character when iterating. | |
| * | |
| * Note that this method is part of the implementation of Iterator and | |
| * should not be called from an other context. | |
| * | |
| * @return string The current UTF-8 character in this string when | |
| * iterating. | |
| */ | |
| public function current() { | |
| // note: iterator will call valid() before current(). | |
| return $this->unsafeSubstr($this->pos, 1); | |
| } | |
| /** | |
| * Return the index of the current UTF-8 character when iterating. | |
| * | |
| * Note that this method is part of the implementation of Iterator and | |
| * should not be called from an other context. | |
| * | |
| * @return int The index of the current UTF-8 character in this string | |
| * when iterating. | |
| */ | |
| public function key() { | |
| return $this->pos; | |
| } | |
| /** | |
| * Move forward in this string to the next UTF-8 character when iterating. | |
| * | |
| * Note that this method is part of the implementation of Iterator and | |
| * should not be called from an other context. | |
| */ | |
| public function next() { | |
| $this->pos++; | |
| } | |
| /** | |
| * Reset the current character index so iterating will (re)start at the | |
| * beginning of this string. | |
| * | |
| * Note that this method is part of the implementation of Iterator and | |
| * should not be called from an other context. | |
| */ | |
| public function rewind() { | |
| $this->pos = 0; | |
| } | |
| /** | |
| * Check if the current character index for iterating is valid. | |
| * | |
| * Note that this method is part of the implementation of Iterator and | |
| * should not be called from an other context. | |
| * | |
| * @return boolean True if the current character index is valid else false. | |
| */ | |
| public function valid() { | |
| return ($this->pos >= 0 && $this->pos < $this->getLength()); | |
| } | |
| /** | |
| * Illegal method: set a character at a specified index location. | |
| * | |
| * Note that this method is part of the implementation of ArrayAccess. | |
| * \Scrivo\Strings are immutable and therefore it is prohibited to set | |
| * elements (characters) in a string, so this method implementation is | |
| * not relevant and throws an exception if called. | |
| * | |
| * @param int $offset | |
| * @param string $value | |
| * | |
| * @throws \Scrivo\SystemException If this method is called. | |
| */ | |
| public function offsetSet($offset, $value) { | |
| throw new \Scrivo\SystemException( | |
| "offsetSet can't be called on \Scrivo\String objects"); | |
| } | |
| /** | |
| * Get an UTF-8 character from a string using array brackets. | |
| * | |
| * Note that this method is part of the implementation of ArrayAccess and | |
| * should not be called from an other context. | |
| * | |
| * @param int $offset A character offet in the string. | |
| * | |
| * @throws \Scrivo\SystemException If the requested offset was out of range. | |
| */ | |
| public function offsetGet($offset) { | |
| if (!$this->offsetExists($offset)) { | |
| throw new \Scrivo\SystemException( | |
| "String index [$offset] out of bounds"); | |
| } | |
| return $this->unsafeSubstr($offset, 1); | |
| } | |
| /** | |
| * Check if the specified index location in this string is valid. | |
| * | |
| * Note that this method is part of the implementation of ArrayAccess and | |
| * should not be called from an other context. | |
| * | |
| * @param int $offset A character offet in the string. | |
| * | |
| * @return boolean True if the specified in index is within the valid range. | |
| */ | |
| public function offsetExists($offset) { | |
| return ($offset >= 0 && $offset < $this->getLength()); | |
| } | |
| /** | |
| * Illegal method: unset a character at a specified index location. | |
| * | |
| * Note that this method is part of the implementation of ArrayAccess. | |
| * \Scrivo\Strings are immutable and therefore it is prohibited to unset | |
| * elements (characters) in a string, so this method implementation is | |
| * not relevant and throws an exception if called. | |
| * | |
| * @param int $offset | |
| * | |
| * @throws \Scrivo\SystemException If this method is called. | |
| */ | |
| public function offsetUnset($offset) { | |
| throw new \Scrivo\SystemException( | |
| "offsetUnset can't be called on \Scrivo\String objects"); | |
| } | |
| /** | |
| * Get a substring from a string using an offset and a length. | |
| * | |
| * Just like PHP's native substr function this method returns a substring | |
| * from this string using an offset and a length. But note that this | |
| * method will throw an exception if the offset is invalid. | |
| * | |
| * @param int $start Start offset for the substring, use a negative number | |
| * to use an offset from the end of the string. | |
| * @param int $length The length of the substring. | |
| * | |
| * @return \Scrivo\String The portion of this string specified by the $start | |
| * and $length parameter. | |
| * | |
| * @throws \Scrivo\SystemException if the requested offset was out of range. | |
| */ | |
| public function substr($start, $length=0xFFFF) { | |
| $tmp = $start < 1 ? -$start : $start; | |
| if (!$this->offsetExists($tmp)) { | |
| throw new \Scrivo\SystemException( | |
| "String index [$start] out of bounds"); | |
| } | |
| return $this->unsafeSubstr($start, $length); | |
| } | |
| /** | |
| * Get a substring from a string using a start and end index. | |
| * | |
| * This method is inspired by it's JAVA counterpart and returns a | |
| * substring of this string using an start and end index. | |
| * | |
| * @param int $start Start offset for the substring. | |
| * @param int $end The end offset for the substring. | |
| * | |
| * @return \Scrivo\String The portion of this string specified by the $start | |
| * and $end parameter. | |
| * | |
| * @throws \Scrivo\SystemException if the requested offset was out of range. | |
| */ | |
| public function substring($start, $end) { | |
| if (!$this->offsetExists($start) || !$this->offsetExists($end) | |
| || $start > $end) { | |
| throw new \Scrivo\SystemException( | |
| "String index [$start, $end] out of bounds"); | |
| } | |
| return $this->unsafeSubstr($start, $end-$start); | |
| } | |
| /** | |
| * Get a trimmed copy of this string. | |
| * | |
| * Returns a copy of the string, with leading and trailing whitespace | |
| * removed. Whitespace characters are: ' ', \t, \r, \n, the character | |
| * for a non breaking space. | |
| * | |
| * @return \Scrivo\String A copy of this string with leading and trailing | |
| * white space removed. | |
| */ | |
| public function trim() { | |
| return new \Scrivo\String( | |
| preg_replace("/(^[\s ]+)|([\s ]+$)/us", "", $this->str)); | |
| } | |
| /** | |
| * Check if the string contains the given substring. | |
| * | |
| * This is the test you normally use strpos(...) !== false for. | |
| * | |
| * @param \Scrivo\String $str The string to search for. | |
| * @param int $offset An offset from where to start the search. | |
| * @param boolean $ignoreCase Set to perform an case insensitive lookup. | |
| * | |
| * @return boolean True if the given string is contained by this string. | |
| * | |
| * @throws \Scrivo\SystemException If the $offset is out of range. | |
| */ | |
| public function contains(\Scrivo\String $str, $offset=0, $ignoreCase=false) { | |
| if ($offset && !$this->offsetExists($offset)) { | |
| throw new \Scrivo\SystemException( | |
| "String index [$offset] out of bounds"); | |
| } | |
| if ($ignoreCase) { | |
| return mb_stripos( | |
| $this->str, (string)$str, $offset, "UTF-8") !== false; | |
| } else { | |
| // binary is ok to do | |
| return strpos($this->str, (string)$str, $offset) !== false; | |
| } | |
| } | |
| /** | |
| * Returns the index of the given substring in this string. | |
| * | |
| * Just like the PHP's native strpos and stripos functions this method | |
| * returns the index of a substring in this string. But there are two | |
| * important differences: this method returns -1 if the substring was | |
| * not found, and this method will raise an exception if the given | |
| * offset was out of range. | |
| * | |
| * @param \Scrivo\String $str The string to search for. | |
| * @param int $offset An offset from where to start the search. | |
| * @param boolean $ignoreCase Set to perform an case insensitive lookup. | |
| * | |
| * @return int The index of the first occurance of the substring after | |
| * $offset and -1 if the substring was not found. | |
| * | |
| * @throws \Scrivo\SystemException If the $offset is out of range. | |
| */ | |
| public function indexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false) { | |
| if ($offset && !$this->offsetExists($offset)) { | |
| throw new \Scrivo\SystemException( | |
| "String index [$offset] out of bounds"); | |
| } | |
| $res = -1; | |
| if ($ignoreCase) { | |
| $res = mb_stripos($this->str, $str, $offset, "UTF-8"); | |
| } else { | |
| $res = mb_strpos($this->str, $str, $offset, "UTF-8"); | |
| } | |
| return $res !== false ? $res : -1; | |
| } | |
| /** | |
| * Returns the index of the last occurance of the given substring in this | |
| * string. | |
| * | |
| * Just like the PHP's native strrpos and strripos functions this method | |
| * returns the substring of this string that start with the first occurance | |
| * of the given a substring in this string. But note that this | |
| * method will throw an exception if the offset is invalid. | |
| * Also an negative offset to indicate an offset measured from the end | |
| * of the string is allowed. But there are two important differences: | |
| * this method returns -1 if the substring was not found, and this method | |
| * will raise an exception if the given offset was out of range. | |
| * | |
| * @param \Scrivo\String $str The string to search for. | |
| * @param int $offset An offset from where to start the search. A positive | |
| * value indicates an offset measured from the start of the string, a | |
| * negative value from the end of the string. | |
| * @param boolean $ignoreCase Perform an case insensitive lookup. | |
| * | |
| * @return int The index of the last occurance of the substring after | |
| * $offset. | |
| * @throws \Scrivo\SystemException If the $offset is out of range. | |
| */ | |
| public function lastIndexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false) { | |
| if ($offset) { | |
| $tmp = $offset < 1 ? -$offset : $offset; | |
| if (!$this->offsetExists($tmp)) { | |
| throw new \Scrivo\SystemException( | |
| "String index [$offset] out of bounds"); | |
| } | |
| } | |
| $res = -1; | |
| if ($ignoreCase) { | |
| $res = mb_strripos($this->str, $str, $offset, "UTF-8"); | |
| } else { | |
| $res = mb_strrpos($this->str, $str, $offset, "UTF-8"); | |
| } | |
| return $res !== false ? $res : -1; | |
| } | |
| /** | |
| * Returns the first occurance of a given substring in this string. | |
| * | |
| * Just like the PHP's native strstr and stristr functions this method | |
| * returns the substring of this string that start with the first occurance | |
| * of the given a substring in this string. Note that this method throws | |
| * an exception if an empty string was given as search string and not | |
| * a warning as strstr does. | |
| * | |
| * @param \Scrivo\String $str The string to search for. | |
| * @param int $part Flag to indicate to return the part of the string | |
| * before the first occurance of the given substring i.o. the part | |
| * after the substring. | |
| * @param boolean $ignoreCase Perform an case insensitive lookup. | |
| * | |
| * @return \Scrivo\String The substring plus the part of the string after the | |
| * the first occurance of the substring, or the part of the string before | |
| * the first occurance of the substring (excluding the substring) or NULL | |
| * if not found. | |
| * | |
| * @throws \Scrivo\SystemException If an empty search string was given. | |
| */ | |
| public function firstOccurranceOf(\Scrivo\String $str, $part=false, | |
| $ignoreCase=false) { | |
| if (!$str->getLength()) { | |
| throw new \Scrivo\SystemException( | |
| "firstOccurranceOf requires a search string"); | |
| } | |
| $res = NULL; | |
| if ($ignoreCase) { | |
| $res = mb_stristr($this->str, $str, $part, "UTF-8"); | |
| } else { | |
| $res = mb_strstr($this->str, $str, $part, "UTF-8"); | |
| } | |
| return $res !== false ? new \Scrivo\String($res) : NULL; | |
| } | |
| /** | |
| * Returns the last occurance of a given character in this string. | |
| * | |
| * Just like the PHP's native strrchr and strrichr functions this method | |
| * returns the substring of this string that start with the first occurance | |
| * of the given a substring in this string. Note that this method throws | |
| * an exception if an empty string was given as search string and not | |
| * a warning as strstr does. | |
| * | |
| * @param \Scrivo\String $str The character to search for. | |
| * @param int $part Flag to indicate to return part of the string before | |
| * the last occurance of the given character i.o. the part after the | |
| * character. | |
| * @param boolean $ignoreCase Perform an case insensitive lookup. | |
| * | |
| * @return \Scrivo\String The substring plus the part of the string after the | |
| * the last occurance of the character, or the part of the string before | |
| * the last occurance of the character (excluding the character) or NULL | |
| * if not found. | |
| * | |
| * @throws \Scrivo\SystemException If a search string of not exactly one | |
| * character in length was given. | |
| */ | |
| public function lastOccurranceOf(\Scrivo\String $str, $part=false, | |
| $ignoreCase=false) { | |
| if ($str->getLength() != 1) { | |
| throw new \Scrivo\SystemException( | |
| "lastOccurranceOf accepts single charaters only"); | |
| } | |
| $res = NULL; | |
| if ($ignoreCase) { | |
| $res = mb_strrichr($this->str, $str, $part, "UTF-8"); | |
| } else { | |
| $res = mb_strrchr($this->str, $str, $part, "UTF-8"); | |
| } | |
| return $res !== false ? new \Scrivo\String($res) : NULL; | |
| } | |
| /** | |
| * Replace a substring or set of substrings in this string. | |
| * | |
| * You can use this method in favour of PHP's native str_replace and strtr | |
| * functions. This method will do proper type checking for you. | |
| * | |
| * @param \Scrivo\String|\Scrivo\String[] $from A (set of) string(s) to replace | |
| * in this string. | |
| * @param \Scrivo\String|\Scrivo\String[] $to A (set of) replacement string(s) to | |
| * replace the found string(s). | |
| * | |
| * @return \Scrivo\String A string with the replaced values. | |
| * | |
| * @throws \Scrivo\SystemException If the input data is not of type | |
| * \Scrivo\String or \Scrivo\String[], of if the $to parameter is an array | |
| * and $from isn't or hasn't the same number of elements. | |
| */ | |
| public function replace($from, $to) { | |
| if ($from instanceof \Scrivo\String && $to instanceof \Scrivo\String) { | |
| return new \Scrivo\String(str_replace($from, $to, $this->str)); | |
| } else if (is_array($from) && $to instanceof \Scrivo\String) { | |
| foreach ($from as $k=>$v) { | |
| if (!($v instanceof \Scrivo\String)) { | |
| throw new \Scrivo\SystemException("From element is" | |
| . " not an \Scrivo\String as array position [$k]"); | |
| } | |
| } | |
| return new \Scrivo\String(str_replace($from, $to, $this->str)); | |
| } else if (is_array($from) && is_array($to)) { | |
| if (count($from) != count($to)) { | |
| throw new \Scrivo\SystemException( | |
| "Input arrays are not the same size"); | |
| } | |
| foreach ($from as $k=>$v) { | |
| if (!($v instanceof \Scrivo\String) | |
| || !($to[$k] instanceof \Scrivo\String)) { | |
| throw new \Scrivo\SystemException("To or from element is" | |
| . " not an \Scrivo\String as array position [$k]"); | |
| } | |
| } | |
| return new \Scrivo\String(str_replace($from, $to, $this->str)); | |
| } | |
| throw new \Scrivo\SystemException("Invalid argument types"); | |
| } | |
| /** | |
| * Split this string using a delimiter. | |
| * | |
| * Just like PHP's native explode this method splits a string on | |
| * boundaries formed by the string delimiter. Note that the behavoir | |
| * of the limit parameter is a little bit different and that this method | |
| * will throw an exception if an empty string is passed as a delimiter. | |
| * | |
| * @param \Scrivo\String $delimiter The boundary string. | |
| * @param int $limit If limit is set and positive, the returned array | |
| * will contain a maximum of limit elements with the last element | |
| * containing the rest of string. If the limit parameter is negative, | |
| * all components except the last -limit are returned. If the limit is | |
| * not set or 0 no limit wil be used. | |
| * | |
| * @return \Scrivo\String[] An array of strings created by splitting the | |
| * string parameter on boundaries formed by the delimiter. If the | |
| * delimiter was not found and array containing a copy of this string | |
| * will be returned except if limit was negative, in that case an | |
| * empty array will be returned. | |
| * | |
| * @throws \Scrivo\SystemException If an empty search string was given. | |
| */ | |
| public function split(\Scrivo\String $delimiter, $limit=0) { | |
| if ($delimiter == "") { | |
| throw new \Scrivo\SystemException( | |
| "split cannot use an empty \"\" delimiter."); | |
| } | |
| $r = $limit ? explode($delimiter, $this->str, $limit) | |
| : explode($delimiter, $this->str); | |
| foreach ($r as $k=>$v) { | |
| $r[$k] = new \Scrivo\String($v); | |
| } | |
| return $r; | |
| } | |
| /** | |
| * Get a copy of this string with all of its characters converted to lower | |
| * case. | |
| * | |
| * @return \Scrivo\String A string containing only lower case characters. | |
| */ | |
| public function toLowerCase() { | |
| return new \Scrivo\String(mb_strtolower($this->str, "UTF-8")); | |
| } | |
| /** | |
| * Get a copy of this string with all of its characters converted to upper | |
| * case. | |
| * | |
| * @return \Scrivo\String A string containing only upper case characters. | |
| */ | |
| public function toUpperCase() { | |
| return new \Scrivo\String(mb_strtoupper($this->str, "UTF-8")); | |
| } | |
| /** | |
| * Compare this string to another \Scrivo\String object. | |
| * | |
| * Note that this method requires the \Scrivo\String collator to be set, | |
| * else the method falls back to the default locale for creating a | |
| * collator and generates a warning. | |
| * | |
| * @param \Scrivo\String $str The string to compare this string to. | |
| * | |
| * @return int Less than 0 if this string is less than the given | |
| * string $str; more than 0 if this string is greater than $str, and | |
| * 0 if they are equal. | |
| */ | |
| public function compareTo(\Scrivo\String $str) { | |
| return self::getCollator()->compare($this->str, $str); | |
| } | |
| /** | |
| * Check if this string exists an array of \Scrivo\String-s. | |
| * | |
| * @param \Scrivo\String $arr The array to search. | |
| * | |
| * @return mixed If found the key of the first occurance of the string | |
| * in the array, else null. | |
| */ | |
| public function inArray($arr) { | |
| foreach ($arr as $k=>$v) { | |
| if ($v->equals($this)) { | |
| return $k; | |
| } | |
| } | |
| return null; | |
| } | |
| } | |