Code Coverage for /home/www/scrivo/Scrivo/scrivo/Scrivo/String.php

	Code Coverage
	Classes and Traits			Functions and Methods				Lines
Total		100.00%	1 / 1		100.00%	36 / 36	CRAP		100.00%	200 / 200
String		100.00%	1 / 1		100.00%	36 / 36	129		100.00%	200 / 200
isUtf8Sequence($seq)					100.00%	1 / 1	34		100.00%	20 / 20
fixCodePageString($str, $encoding)					100.00%	1 / 1	6		100.00%	18 / 18
fixString($str, $toDecode=self::DECODE_NONE, $encoding="UTF-8")					100.00%	1 / 1	5		100.00%	22 / 22
unsafeSubstr($start, $length)					100.00%	1 / 1	1		100.00%	1 / 1
__construct($str="", $toDecode=self::DECODE_NONE, $encoding="UTF-8")					100.00%	1 / 1	3		100.00%	5 / 5
create($str="", $toDecode=self::DECODE_NONE, $encoding="UTF-8")					100.00%	1 / 1	3		100.00%	6 / 6
getCollator()					100.00%	1 / 1	2		100.00%	4 / 4
setCollator(\Collator $coll)					100.00%	1 / 1	1		100.00%	2 / 2
__get($name)					100.00%	1 / 1	3		100.00%	3 / 3
__toString()					100.00%	1 / 1	1		100.00%	1 / 1
equals(\Scrivo\String $str)					100.00%	1 / 1	1		100.00%	1 / 1
getLength()					100.00%	1 / 1	2		100.00%	4 / 4
count()					100.00%	1 / 1	1		100.00%	1 / 1
current()					100.00%	1 / 1	1		100.00%	1 / 1
key()					100.00%	1 / 1	1		100.00%	1 / 1
next()					100.00%	1 / 1	1		100.00%	2 / 2
rewind()					100.00%	1 / 1	1		100.00%	2 / 2
valid()					100.00%	1 / 1	2		100.00%	1 / 1
offsetSet($offset, $value)					100.00%	1 / 1	1		100.00%	2 / 2
offsetGet($offset)					100.00%	1 / 1	2		100.00%	4 / 4
offsetExists($offset)					100.00%	1 / 1	2		100.00%	1 / 1
offsetUnset($offset)					100.00%	1 / 1	1		100.00%	2 / 2
substr($start, $length=0xFFFF)					100.00%	1 / 1	3		100.00%	5 / 5
substring($start, $end)					100.00%	1 / 1	4		100.00%	5 / 5
trim()					100.00%	1 / 1	1		100.00%	2 / 2
contains(\Scrivo\String $str, $offset=0, $ignoreCase=false)					100.00%	1 / 1	4		100.00%	7 / 7
indexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false)					100.00%	1 / 1	5		100.00%	9 / 9
lastIndexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false)					100.00%	1 / 1	6		100.00%	12 / 12
firstOccurranceOf(\Scrivo\String $str, $part=false, $ignoreCase=false)					100.00%	1 / 1	4		100.00%	9 / 9
lastOccurranceOf(\Scrivo\String $str, $part=false, $ignoreCase=false)					100.00%	1 / 1	4		100.00%	9 / 9
replace($from, $to)					100.00%	1 / 1	13		100.00%	21 / 21
split(\Scrivo\String $delimiter, $limit=0)					100.00%	1 / 1	4		100.00%	9 / 9
toLowerCase()					100.00%	1 / 1	1		100.00%	1 / 1
toUpperCase()					100.00%	1 / 1	1		100.00%	1 / 1
compareTo(\Scrivo\String $str)					100.00%	1 / 1	1		100.00%	1 / 1
inArray($arr)					100.00%	1 / 1	3		100.00%	5 / 5

1	<?php
2	/* Copyright (c) 2012, Geert Bergman (geert@scrivo.nl)
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions are met:
7	*
8	* 1. Redistributions of source code must retain the above copyright notice,
9	* this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright notice,
11	* this list of conditions and the following disclaimer in the documentation
12	* and/or other materials provided with the distribution.
13	* 3. Neither the name of "Scrivo" nor the names of its contributors may be
14	* used to endorse or promote products derived from this software without
15	* specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
21	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27	* POSSIBILITY OF SUCH DAMAGE.
28	*
29	* $Id: String.php 841 2013-08-19 22:19:47Z geert $
30	*/
31
32	/**
33	* Implementation of the \Scrivo\String class.
34	*/
35
36	namespace Scrivo;
37
38	/**
39	* Wrapper class for PHP strings to enforce consistent and safe multi-byte
40	* (UTF-8) string handling.
41	*
42	* \Scrivo\String is a primitive wrapper class for PHP strings to make sure that
43	* all operations performed on the string are UTF-8 safe. As PHP does not
44	* enforce a consistent way to deal with multibyte strings we do it
45	* ourselves. In the Scrivo code base UTF-8 is the only encoding that is
46	* supported for operations on data and these operations should be done
47	* through instances of the \Scrivo\String class. If strings are used as byte
48	* arrays, use the ByteArray class.
49	*
50	* \Scrivo\String objects are imutable: once created you can't change them. All
51	* operations on a \Scrivo\String object will return a new \Scrivo\String object.
52	*
53	* Although we'll be working with UTF-8 exclusively it is possible to create
54	* \Scrivo\String objects that contain characters from 8 byte encoding schemes.
55	* Also a note on HTML entities, we work with UTF-8 so you don't need them:
56	* they are evil. Except entities for the reserved HTML characters (<>&'")
57	* there is really no use for them in UTF-8 strings. And when stored in a
58	* database only cause sorting and lookup errors. Therefore when construction
59	* \Scrivo\String objects you can opt to convert existing HTML entities to their
60	* corresonding UTF-8 characters.
61	*
62	* The current locale setting for LC_COLLATE is important.
63	* \Scrivo\String::compareTo() will use this setting when comparing strings.
64	*
65	* Please note: you might be tempted to do string comparison using
66	* equality operators (==). Although this works in most cases don't do this:
67	* you'll do PHP object comparison (i.e. comparing a
68	* \Scrivo\String object) and that is not what you want: use \Scrivo\String::equals()
69	* or \Scrivo\String::compareTo() to compare strings.
70	*/
71	class String implements \Iterator, \ArrayAccess, \Countable {
72
73	/**
74	* Constant to denote ISO-8859-1 encoding. This is the default encoding
75	* for \Scrivo\String uses for fixing and comparing.
76	*/
77	const ENC_ISO_8859_1 = "ISO-8859-1";
78
79	/**
80	* Constant to denote CP-1251 encoding.
81	*/
82	const ENC_CP_1251 = "CP-1251";
83
84	/**
85	* Constant to indicate that you don't want to decode any entities when
86	* constructing the string.
87	*/
88	const DECODE_NONE = 0;
89
90	/**
91	* Constant to indicate that you want to decode all entities when
92	* constructing the string.
93	*/
94	const DECODE_ALL = 1;
95
96	/**
97	* Constant to indicate that you want to decode all but the entities for
98	* reserved characters (&<>'") when constructing the string.
99	*/
100	const DECODE_UNRESERVED = 2;
101
102	/**
103	* The primitive UTF-8 string.
104	* @var string
105	*/
106	private $str;
107
108	/**
109	* The current position when iterating.
110	* @var string
111	*/
112	private $pos;
113
114	/**
115	* The length of the string (characters not bytes).
116	* @var int
117	*/
118	private $len = -1;
119
120	/**
121	* Collator used for sorting. This is a static shared amongst instances.
122	* @var \Collator
123	*/
124	private static $coll;
125
126	/**
127	* Map to translate 8 byte code page characters to UTF-8 sequences.
128	* @var array[]
129	*/
130	private static $maps = array(
131	self::ENC_ISO_8859_1 => array(128 =>
132	"€","�","‚","ƒ","„","…","†","‡","ˆ","‰","Š","‹","Œ","�","Ž","�",
133	"�","‘","’","“","”","•","–","—","˜","™","š","›","œ","�","ž","Ÿ",
134	" ","¡","¢","£","¤","¥","¦","§","¨","©","ª","«","¬","","®","¯",
135	"°","±","²","³","´","µ","¶","·","¸","¹","º","»","¼","½","¾","¿",
136	"À","Á","Â","Ã","Ä","Å","Æ","Ç","È","É","Ê","Ë","Ì","Í","Î","Ï",
137	"Ð","Ñ","Ò","Ó","Ô","Õ","Ö","×","Ø","Ù","Ú","Û","Ü","Ý","Þ","ß",
138	"à","á","â","ã","ä","å","æ","ç","è","é","ê","ë","ì","í","î","ï",
139	"ð","ñ","ò","ó","ô","õ","ö","÷","ø","ù","ú","û","ü","ý","þ","ÿ",
140	),
141	self::ENC_CP_1251 => array(128 =>
142	"Ђ","Ѓ","‚","ѓ","„","…","†","‡","€","‰","Љ","‹","Њ","Ќ","Ћ","Џ",
143	"ђ","‘","’","“","”","•","–","—","�","™","љ","›","њ","ќ","ћ","џ",
144	" ","Ў","ў","Ј","¤","Ґ","¦","§","Ё","©","Є","«","¬","","®","Ї",
145	"°","±","І","і","ґ","µ","¶","·","ё","№","є","»","ј","Ѕ","ѕ","ї",
146	"А","Б","В","Г","Д","Е","Ж","З","И","Й","К","Л","М","Н","О","П",
147	"Р","С","Т","У","Ф","Х","Ц","Ч","Ш","Щ","Ъ","Ы","Ь","Э","Ю","Я",
148	"а","б","в","г","д","е","ж","з","и","й","к","л","м","н","о","п",
149	"р","с","т","у","ф","х","ц","ч","ш","щ","ъ","ы","ь","э","ю","я",
150	),
151	);
152
153	/**
154	* Test if a given byte sequence is a valid UTF-8 sequence.
155	*
156	* If the tested byte sequence is a valid UTF-8 sequence the method
157	* returns the length of the sequence, else the method returns 0.
158	*
159	* @param string $seq The byte sequence to test.
160	*
161	* @return int The length of the UTF-8 sequence (2-4) or 0 if the
162	* sequence is not an UTF-8 sequence.
163	*/
164	private function isUtf8Sequence($seq) {
165
166	// check if the first byte is a UTF-8 marker and if not end it here
167	$b1 = ord($seq[0]);
168	if ($b1<0xC2 \|\| $b1>=0xF5) {
169	return 0;
170	}
171
172	// get the length to prevent overshooting when reading
173	$len = strlen($seq);
174
175	// Get the 2nd and 3rd byte and test it, note that for a valid UTF-8
176	// sequence we only allow the byte (here byte three) after the
177	// sequence to be none, an ascii character, or a new UTF-8 marker
178	// (which is more limiting than not to allow continuation bytes
179	// (b3 < 0x80 && b3 >= OxBF) only).
180	$b2 = ($len>1) ? ord($seq[1]) : 0;
181	$b3 = ($len>2) ? ord($seq[2]) : 0;
182	if ($b1>=0xC2 && $b1<0xE0 && $b2>=0x80 && $b2<0xC0
183	&& ($b3<0x80 \|\| ($b3>=0xC2 && $b3<0xF5))) {
184	return 2;
185	}
186
187	// We're not there, test for a 3 byte byte sequence. See the comment
188	// above on testing the 4th byte.
189	$b4 = ($len>3) ? ord($seq[3]) : 0;
190	if ($b1>=0xE0 && $b1<0xF0
191	&& $b2>=0x80 && $b2<0xC0 && $b3>=0x80 && $b3<0xC0
192	&& ($b4<0x80 \|\| ($b4>=0xC2 && $b4<0xF5))) {
193	return 3;
194	}
195
196	// We're not there, test for a 3 byte byte sequence. See the comment
197	// above on testing the 5th byte. Also note that b2 is restricted
198	// so that we keep in the <= U+10FFFF range
199	$b5 = ($len>4) ? ord($seq[4]) : 0;
200	if ($b1>=0xF0 && $b1<0xF5 && $b2>=0x80 && $b2<0xC0
201	&& $b3>=0x80 && $b3<0xC0 && $b4>=0x80 && $b4<0xC0
202	&& ($b5<0x80 \|\| ($b5>=0xC2 && $b5<0xF5))) {
203	return 4;
204	}
205
206	// This is not a valid UTF-8 sequence.
207	return 0;
208	}
209
210	/**
211	* Convert a string with UTF-8 and code page characters to a valid UTF-8
212	* string.
213	*
214	* When converting the input string to UTF-8 all bytes in the 0x80-0xFF
215	* range are first tested if they are is a valid UTF-8 byte sequences, if
216	* not it is assumed that it is an 8 byte code page character and
217	* converted according to the given encoding. Supported encodings are:
218	*
219	* * Utf8string::ENC_ISO_8859_1
220	* * Utf8string::ENC_CP_1251
221	*
222	* @param string $str The string with mixed UTF-8 and and 8 byte code
223	* page characters.
224	* @param string $encoding The encoding to use when converting 8 byte code
225	* page characters to UTF-8.
226	*
227	* @return string A valid UTF-8 string.
228	*/
229	private function fixCodePageString($str, $encoding) {
230
231	// set the encoding
232	if ($encoding != self::ENC_ISO_8859_1
233	&& $encoding != self::ENC_CP_1251) {
234	throw
235	new \Scrivo\SystemException("Unsupported encoding: $encoding");
236	}
237
238	// Split the data on any occurance of a byte with the high bit set
239	$parts =
240	preg_split('/[\x80-\xFF]/', $str, -1, PREG_SPLIT_OFFSET_CAPTURE);
241
242	// See if there's anything to do
243	$c = count($parts);
244	if ($c<=1) {
245	return $str;
246	}
247
248	// Start with the first part
249	$out = $parts[0][0];
250	for ($i=1; $i<$c; $i++) {
251	// Get a 6 byte sequence on a split location ...
252	$seq = substr($str, $parts[$i][1]-1, 6);
253	// ... and check if is a valid UTF-8 byte sequence, ...
254	$utf8_seq_width = $this->isUtf8Sequence($seq);
255	if ($utf8_seq_width) {
256	// ... if so add it to output ...
257	$res = substr($seq, 0, $utf8_seq_width);
258	// ... and jump over the parts.
259	$i += ($utf8_seq_width - 1);
260	} else {
261	// ... else treat it as a codepage character
262	$res = self::$maps[$encoding][ord($seq[0])];
263	}
264	// add the UTF-8 character and next part to the output
265	$out .= $res.$parts[$i][0];
266	}
267
268	return $out;
269	}
270
271	/**
272	* Convert a string with HTML entities, UTF-8 and code page characters
273	* to a valid UTF-8 string.
274	*
275	* When converting the input string to UTF-8 all bytes in the 0x80-0xFF
276	* range are first tested if they are is a valid UTF-8 byte sequences, if
277	* not it is assumed that it is an 8 byte code page character and
278	* converted according to the given encoding. Supported encodings are:
279	*
280	* * Utf8string::ENC_ISO_8859_1
281	* * Utf8string::ENC_CP_1251
282	*
283	* You can opt to convert HTML entities in the string to their
284	* corresponding characters. Possible choices are:
285	*
286	* * Utf8string::DECODE_NONE don't decode HTML entities
287	* * Utf8string::DECODE_ALL, decode all HTML entities;
288	* * Utf8string::DECODE_UNRESERVED, decode all but the HTML entities
289	* for <>&' and ' (HTML/XML)
290	*
291	* @param string $str The source string, a possible mixture of HTML
292	* entities, UTF-8 and code page characters.
293	* @param int $toDecode Which entities
294	* @param string $encoding The encoding to use when converting 8 byte code
295	* page characters to UTF-8.
296	*
297	* @return string A valid UTF-8 string.
298	*/
299	private function fixString($str, $toDecode=self::DECODE_NONE,
300	$encoding="UTF-8") {
301
302	// List of HTML-entities we want to keep.
303	$reserved = array(
304	"<", ">", "&", """, "'",
305	"<", ">", "&", """, "'",
306	"<",">", "&", """, "'"
307	);
308
309	// List of HTML-entity markers to replace the ones you want to
310	// keep, so html_entity_decode will leave them alone.
311	$save = array(
312	"#@lt!;", "#@gt!;", "#@amp!;", "#@quot!;", "#*@#039!;",
313	"#@lt!;", "#@gt!;", "#@amp!;", "#@quot!;", "#*@#039!;",
314	"#@lt!;", "#@gt!;", "#@amp!;", "#@quot!;", "#*@#039!;"
315	);
316
317	if ($toDecode == self::DECODE_UNRESERVED) {
318	// 'Save' entities for reserved characters.
319	$str = str_replace($reserved, $save, $str);
320	}
321	if ($encoding != "UTF-8") {
322	// Fix characters that are not properly UTF-8 encoded
323	$str = $this->fixCodePageString($str, $encoding);
324	}
325	if ($toDecode != self::DECODE_NONE) {
326	// Change all entities to their corresponding UTF-8 characters.
327	$str = html_entity_decode($str, ENT_QUOTES, "UTF-8");
328	}
329	if ($toDecode == self::DECODE_UNRESERVED) {
330	// 'Restore' previously saved entities.
331	$str = str_replace(array_slice($save, 0, 5),
332	array_slice($reserved, 0, 5), $str);
333	}
334
335	return $str;
336	}
337
338	/**
339	* Get a substring from a string without first checking the boundaries.
340	*
341	* @param int $start Start offset for the substring, use a negative number
342	* to use an offset from the end of the string.
343	* @param int $length The length of the substring.
344	*
345	* @return \Scrivo\String The requested portion of this string.
346	*/
347	private function unsafeSubstr($start, $length) {
348	return new \Scrivo\String(mb_substr($this->str, $start, $length, "UTF-8"));
349	}
350
351	/**
352	* Construct an \Scrivo\String.
353	*
354	* You can either construct an \Scrivo\String object from a valid UTF-8 string,
355	* or from a string that you expect not to contain valid UTF-8 data. In the
356	* latter case use the $toDecode and/or $encoding parameters.
357	*
358	* Possible choices for $toDecode are:
359	*
360	* * Utf8string::DECODE_NONE don't decode HTML entities
361	* * Utf8string::DECODE_ALL, decode all HTML entities;
362	* * Utf8string::DECODE_UNRESERVED, decode all but the HTML entities
363	* for <>&' and ' (HTML/XML)
364	*
365	* If you expect that the source string contains 8 byte code page character
366	* then you can select the encoding to use to convert them to their
367	* corresponding UTF-8 characters. Supported encodings are:
368	*
369	* * Utf8string::ENC_ISO_8859_1
370	* * Utf8string::ENC_CP_1251
371	*
372	* Note: typical use of the $toDecode and $encoding parameters is when
373	* you want to 'sanitize' data before you store it into a database. Setting
374	* these parameters start CPU intensive procedures so it's best not to use
375	* them in bluk operations (like that inner loop or slashdotted home page).
376	* And remember when all data was safely stored as UTF-8, there will be
377	* no need to 'sanitize' it before displaying.
378	*
379	* @param string $str The source string, a possible mixture of HTML
380	* entities, UTF-8 and code page characters.
381	* @param int $toDecode Which entities
382	* @param string $encoding The encoding to use when converting 8 byte code
383	* page characters to UTF-8.
384	*/
385	public function __construct($str="", $toDecode=self::DECODE_NONE,
386	$encoding="UTF-8") {
387	$str = (string)$str;
388	$this->str = $toDecode==self::DECODE_NONE && $encoding=="UTF-8" ? $str
389	: $this->fixString($str, $toDecode, $encoding);
390	$this->pos = 0;
391	}
392
393	/**
394	* Factory method to construct an \Scrivo\String.
395	*
396	* @see \Scrivo\String::__construct()
397	*
398	* @param string $str The string to create the wrapper for. It is assumed
399	* that this will be a valid UTF-8 string. If this is not the case,
400	* you'll need to set the additional parameters.
401	* @param int $toDecode Which entities
402	* @param string $encoding The encoding to use when converting 8 byte code
403	*
404	* @return \Scrivo\String\|\Scrivo\String An \Scrivo\String wrapper object.
405	*/
406	public static function create($str="", $toDecode=self::DECODE_NONE,
407	$encoding="UTF-8") {
408	if (is_array($str)) {
409	foreach($str as $k=>$v) {
410	$str[$k] = self::create($v, $toDecode, $encoding);
411	}
412	return $str;
413	}
414	return new \Scrivo\String($str, $toDecode, $encoding);
415	}
416
417	/**
418	* Get the collator for sorting strings.
419	*
420	* @return \Collator The currently set collator for the \Scrivo\String
421	* class.
422	*/
423	public static function getCollator() {
424	if (!self::$coll) {
425	self::$coll = new \Collator(\Locale::getDefault());
426	}
427	return self::$coll;
428	}
429
430	/**
431	* Set the collator for sorting strings.
432	*
433	* @param \Collator $coll The collator to use.
434	*/
435	public static function setCollator(\Collator $coll) {
436	self::$coll = $coll;
437	}
438
439	/**
440	* Implementation of the readable properties using the PHP magic
441	* method __get().
442	*
443	* @param string $name The name of the property to get.
444	*
445	* @return mixed The value of the requested property.
446	*/
447	public function __get($name) {
448	switch($name) {
449	case "length": return $this->getLength();
450	case "collator": return self::getCollator();
451	}
452	throw new \Scrivo\SystemException("No such property '$name'.");
453	}
454
455	/**
456	* Return the primitive UTF-8 string for this instance.
457	*
458	* @return string The primitive UTF-8 string for this instance.
459	*/
460	public function __toString() {
461	return $this->str;
462	}
463
464	/**
465	* Test if this string equals another \Scrivo\String object.
466	*
467	* When you want test \Scrivo\String object for equality, use this method
468	* and never the equality operator (==) because then you'll compare
469	* objects and therefore all data members of \Scrivo\String and this can
470	* give you other results (or cast the \Scrivo\String strings to PHP strings
471	* before comparing).
472	*
473	* @param \Scrivo\String $str The string to compare this string to.
474	*
475	* @return boolean True if the given string equals this string.
476	*/
477	public function equals(\Scrivo\String $str) {
478	return (string)$this->str == (string)$str;
479	}
480
481	/**
482	* Get the length of the string.
483	*
484	* @return int The length of the string in characters (not bytes).
485	*/
486	public function getLength() {
487	if ($this->len == -1) {
488	$this->len = mb_strlen($this->str, "UTF-8");
489	}
490	return $this->len;
491	}
492
493	/**
494	* Return the character count of the string.
495	*
496	* This is an alias for getLength() and part of the implementation of
497	* Countable.
498	*
499	* @return int The length of the string in characters.
500	*/
501	public function count() {
502	return $this->getLength();
503	}
504
505	/**
506	* Return the current UTF-8 character when iterating.
507	*
508	* Note that this method is part of the implementation of Iterator and
509	* should not be called from an other context.
510	*
511	* @return string The current UTF-8 character in this string when
512	* iterating.
513	*/
514	public function current() {
515	// note: iterator will call valid() before current().
516	return $this->unsafeSubstr($this->pos, 1);
517	}
518
519	/**
520	* Return the index of the current UTF-8 character when iterating.
521	*
522	* Note that this method is part of the implementation of Iterator and
523	* should not be called from an other context.
524	*
525	* @return int The index of the current UTF-8 character in this string
526	* when iterating.
527	*/
528	public function key() {
529	return $this->pos;
530	}
531
532	/**
533	* Move forward in this string to the next UTF-8 character when iterating.
534	*
535	* Note that this method is part of the implementation of Iterator and
536	* should not be called from an other context.
537	*/
538	public function next() {
539	$this->pos++;
540	}
541
542	/**
543	* Reset the current character index so iterating will (re)start at the
544	* beginning of this string.
545	*
546	* Note that this method is part of the implementation of Iterator and
547	* should not be called from an other context.
548	*/
549	public function rewind() {
550	$this->pos = 0;
551	}
552
553	/**
554	* Check if the current character index for iterating is valid.
555	*
556	* Note that this method is part of the implementation of Iterator and
557	* should not be called from an other context.
558	*
559	* @return boolean True if the current character index is valid else false.
560	*/
561	public function valid() {
562	return ($this->pos >= 0 && $this->pos < $this->getLength());
563	}
564
565	/**
566	* Illegal method: set a character at a specified index location.
567	*
568	* Note that this method is part of the implementation of ArrayAccess.
569	* \Scrivo\Strings are immutable and therefore it is prohibited to set
570	* elements (characters) in a string, so this method implementation is
571	* not relevant and throws an exception if called.
572	*
573	* @param int $offset
574	* @param string $value
575	*
576	* @throws \Scrivo\SystemException If this method is called.
577	*/
578	public function offsetSet($offset, $value) {
579	throw new \Scrivo\SystemException(
580	"offsetSet can't be called on \Scrivo\String objects");
581	}
582
583	/**
584	* Get an UTF-8 character from a string using array brackets.
585	*
586	* Note that this method is part of the implementation of ArrayAccess and
587	* should not be called from an other context.
588	*
589	* @param int $offset A character offet in the string.
590	*
591	* @throws \Scrivo\SystemException If the requested offset was out of range.
592	*/
593	public function offsetGet($offset) {
594	if (!$this->offsetExists($offset)) {
595	throw new \Scrivo\SystemException(
596	"String index [$offset] out of bounds");
597	}
598	return $this->unsafeSubstr($offset, 1);
599	}
600
601	/**
602	* Check if the specified index location in this string is valid.
603	*
604	* Note that this method is part of the implementation of ArrayAccess and
605	* should not be called from an other context.
606	*
607	* @param int $offset A character offet in the string.
608	*
609	* @return boolean True if the specified in index is within the valid range.
610	*/
611	public function offsetExists($offset) {
612	return ($offset >= 0 && $offset < $this->getLength());
613	}
614
615	/**
616	* Illegal method: unset a character at a specified index location.
617	*
618	* Note that this method is part of the implementation of ArrayAccess.
619	* \Scrivo\Strings are immutable and therefore it is prohibited to unset
620	* elements (characters) in a string, so this method implementation is
621	* not relevant and throws an exception if called.
622	*
623	* @param int $offset
624	*
625	* @throws \Scrivo\SystemException If this method is called.
626	*/
627	public function offsetUnset($offset) {
628	throw new \Scrivo\SystemException(
629	"offsetUnset can't be called on \Scrivo\String objects");
630	}
631
632	/**
633	* Get a substring from a string using an offset and a length.
634	*
635	* Just like PHP's native substr function this method returns a substring
636	* from this string using an offset and a length. But note that this
637	* method will throw an exception if the offset is invalid.
638	*
639	* @param int $start Start offset for the substring, use a negative number
640	* to use an offset from the end of the string.
641	* @param int $length The length of the substring.
642	*
643	* @return \Scrivo\String The portion of this string specified by the $start
644	* and $length parameter.
645	*
646	* @throws \Scrivo\SystemException if the requested offset was out of range.
647	*/
648	public function substr($start, $length=0xFFFF) {
649	$tmp = $start < 1 ? -$start : $start;
650	if (!$this->offsetExists($tmp)) {
651	throw new \Scrivo\SystemException(
652	"String index [$start] out of bounds");
653	}
654	return $this->unsafeSubstr($start, $length);
655	}
656
657	/**
658	* Get a substring from a string using a start and end index.
659	*
660	* This method is inspired by it's JAVA counterpart and returns a
661	* substring of this string using an start and end index.
662	*
663	* @param int $start Start offset for the substring.
664	* @param int $end The end offset for the substring.
665	*
666	* @return \Scrivo\String The portion of this string specified by the $start
667	* and $end parameter.
668	*
669	* @throws \Scrivo\SystemException if the requested offset was out of range.
670	*/
671	public function substring($start, $end) {
672	if (!$this->offsetExists($start) \|\| !$this->offsetExists($end)
673	\|\| $start > $end) {
674	throw new \Scrivo\SystemException(
675	"String index [$start, $end] out of bounds");
676	}
677	return $this->unsafeSubstr($start, $end-$start);
678	}
679
680	/**
681	* Get a trimmed copy of this string.
682	*
683	* Returns a copy of the string, with leading and trailing whitespace
684	* removed. Whitespace characters are: ' ', \t, \r, \n, the character
685	* for a non breaking space.
686	*
687	* @return \Scrivo\String A copy of this string with leading and trailing
688	* white space removed.
689	*/
690	public function trim() {
691	return new \Scrivo\String(
692	preg_replace("/(^[\s ]+)\|([\s ]+$)/us", "", $this->str));
693	}
694
695	/**
696	* Check if the string contains the given substring.
697	*
698	* This is the test you normally use strpos(...) !== false for.
699	*
700	* @param \Scrivo\String $str The string to search for.
701	* @param int $offset An offset from where to start the search.
702	* @param boolean $ignoreCase Set to perform an case insensitive lookup.
703	*
704	* @return boolean True if the given string is contained by this string.
705	*
706	* @throws \Scrivo\SystemException If the $offset is out of range.
707	*/
708	public function contains(\Scrivo\String $str, $offset=0, $ignoreCase=false) {
709	if ($offset && !$this->offsetExists($offset)) {
710	throw new \Scrivo\SystemException(
711	"String index [$offset] out of bounds");
712	}
713	if ($ignoreCase) {
714	return mb_stripos(
715	$this->str, (string)$str, $offset, "UTF-8") !== false;
716	} else {
717	// binary is ok to do
718	return strpos($this->str, (string)$str, $offset) !== false;
719	}
720	}
721
722	/**
723	* Returns the index of the given substring in this string.
724	*
725	* Just like the PHP's native strpos and stripos functions this method
726	* returns the index of a substring in this string. But there are two
727	* important differences: this method returns -1 if the substring was
728	* not found, and this method will raise an exception if the given
729	* offset was out of range.
730	*
731	* @param \Scrivo\String $str The string to search for.
732	* @param int $offset An offset from where to start the search.
733	* @param boolean $ignoreCase Set to perform an case insensitive lookup.
734	*
735	* @return int The index of the first occurance of the substring after
736	* $offset and -1 if the substring was not found.
737	*
738	* @throws \Scrivo\SystemException If the $offset is out of range.
739	*/
740	public function indexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false) {
741	if ($offset && !$this->offsetExists($offset)) {
742	throw new \Scrivo\SystemException(
743	"String index [$offset] out of bounds");
744	}
745	$res = -1;
746	if ($ignoreCase) {
747	$res = mb_stripos($this->str, $str, $offset, "UTF-8");
748	} else {
749	$res = mb_strpos($this->str, $str, $offset, "UTF-8");
750	}
751	return $res !== false ? $res : -1;
752	}
753
754	/**
755	* Returns the index of the last occurance of the given substring in this
756	* string.
757	*
758	* Just like the PHP's native strrpos and strripos functions this method
759	* returns the substring of this string that start with the first occurance
760	* of the given a substring in this string. But note that this
761	* method will throw an exception if the offset is invalid.
762	* Also an negative offset to indicate an offset measured from the end
763	* of the string is allowed. But there are two important differences:
764	* this method returns -1 if the substring was not found, and this method
765	* will raise an exception if the given offset was out of range.
766	*
767	* @param \Scrivo\String $str The string to search for.
768	* @param int $offset An offset from where to start the search. A positive
769	* value indicates an offset measured from the start of the string, a
770	* negative value from the end of the string.
771	* @param boolean $ignoreCase Perform an case insensitive lookup.
772	*
773	* @return int The index of the last occurance of the substring after
774	* $offset.
775	* @throws \Scrivo\SystemException If the $offset is out of range.
776	*/
777	public function lastIndexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false) {
778	if ($offset) {
779	$tmp = $offset < 1 ? -$offset : $offset;
780	if (!$this->offsetExists($tmp)) {
781	throw new \Scrivo\SystemException(
782	"String index [$offset] out of bounds");
783	}
784	}
785	$res = -1;
786	if ($ignoreCase) {
787	$res = mb_strripos($this->str, $str, $offset, "UTF-8");
788	} else {
789	$res = mb_strrpos($this->str, $str, $offset, "UTF-8");
790	}
791	return $res !== false ? $res : -1;
792	}
793
794	/**
795	* Returns the first occurance of a given substring in this string.
796	*
797	* Just like the PHP's native strstr and stristr functions this method
798	* returns the substring of this string that start with the first occurance
799	* of the given a substring in this string. Note that this method throws
800	* an exception if an empty string was given as search string and not
801	* a warning as strstr does.
802	*
803	* @param \Scrivo\String $str The string to search for.
804	* @param int $part Flag to indicate to return the part of the string
805	* before the first occurance of the given substring i.o. the part
806	* after the substring.
807	* @param boolean $ignoreCase Perform an case insensitive lookup.
808	*
809	* @return \Scrivo\String The substring plus the part of the string after the
810	* the first occurance of the substring, or the part of the string before
811	* the first occurance of the substring (excluding the substring) or NULL
812	* if not found.
813	*
814	* @throws \Scrivo\SystemException If an empty search string was given.
815	*/
816	public function firstOccurranceOf(\Scrivo\String $str, $part=false,
817	$ignoreCase=false) {
818	if (!$str->getLength()) {
819	throw new \Scrivo\SystemException(
820	"firstOccurranceOf requires a search string");
821	}
822	$res = NULL;
823	if ($ignoreCase) {
824	$res = mb_stristr($this->str, $str, $part, "UTF-8");
825	} else {
826	$res = mb_strstr($this->str, $str, $part, "UTF-8");
827	}
828	return $res !== false ? new \Scrivo\String($res) : NULL;
829	}
830
831	/**
832	* Returns the last occurance of a given character in this string.
833	*
834	* Just like the PHP's native strrchr and strrichr functions this method
835	* returns the substring of this string that start with the first occurance
836	* of the given a substring in this string. Note that this method throws
837	* an exception if an empty string was given as search string and not
838	* a warning as strstr does.
839	*
840	* @param \Scrivo\String $str The character to search for.
841	* @param int $part Flag to indicate to return part of the string before
842	* the last occurance of the given character i.o. the part after the
843	* character.
844	* @param boolean $ignoreCase Perform an case insensitive lookup.
845	*
846	* @return \Scrivo\String The substring plus the part of the string after the
847	* the last occurance of the character, or the part of the string before
848	* the last occurance of the character (excluding the character) or NULL
849	* if not found.
850	*
851	* @throws \Scrivo\SystemException If a search string of not exactly one
852	* character in length was given.
853	*/
854	public function lastOccurranceOf(\Scrivo\String $str, $part=false,
855	$ignoreCase=false) {
856	if ($str->getLength() != 1) {
857	throw new \Scrivo\SystemException(
858	"lastOccurranceOf accepts single charaters only");
859	}
860	$res = NULL;
861	if ($ignoreCase) {
862	$res = mb_strrichr($this->str, $str, $part, "UTF-8");
863	} else {
864	$res = mb_strrchr($this->str, $str, $part, "UTF-8");
865	}
866	return $res !== false ? new \Scrivo\String($res) : NULL;
867	}
868
869	/**
870	* Replace a substring or set of substrings in this string.
871	*
872	* You can use this method in favour of PHP's native str_replace and strtr
873	* functions. This method will do proper type checking for you.
874	*
875	* @param \Scrivo\String\|\Scrivo\String[] $from A (set of) string(s) to replace
876	* in this string.
877	* @param \Scrivo\String\|\Scrivo\String[] $to A (set of) replacement string(s) to
878	* replace the found string(s).
879	*
880	* @return \Scrivo\String A string with the replaced values.
881	*
882	* @throws \Scrivo\SystemException If the input data is not of type
883	* \Scrivo\String or \Scrivo\String[], of if the $to parameter is an array
884	* and $from isn't or hasn't the same number of elements.
885	*/
886	public function replace($from, $to) {
887	if ($from instanceof \Scrivo\String && $to instanceof \Scrivo\String) {
888	return new \Scrivo\String(str_replace($from, $to, $this->str));
889	} else if (is_array($from) && $to instanceof \Scrivo\String) {
890	foreach ($from as $k=>$v) {
891	if (!($v instanceof \Scrivo\String)) {
892	throw new \Scrivo\SystemException("From element is"
893	. " not an \Scrivo\String as array position [$k]");
894	}
895	}
896	return new \Scrivo\String(str_replace($from, $to, $this->str));
897	} else if (is_array($from) && is_array($to)) {
898	if (count($from) != count($to)) {
899	throw new \Scrivo\SystemException(
900	"Input arrays are not the same size");
901	}
902	foreach ($from as $k=>$v) {
903	if (!($v instanceof \Scrivo\String)
904	\|\| !($to[$k] instanceof \Scrivo\String)) {
905	throw new \Scrivo\SystemException("To or from element is"
906	. " not an \Scrivo\String as array position [$k]");
907	}
908	}
909	return new \Scrivo\String(str_replace($from, $to, $this->str));
910	}
911	throw new \Scrivo\SystemException("Invalid argument types");
912	}
913
914	/**
915	* Split this string using a delimiter.
916	*
917	* Just like PHP's native explode this method splits a string on
918	* boundaries formed by the string delimiter. Note that the behavoir
919	* of the limit parameter is a little bit different and that this method
920	* will throw an exception if an empty string is passed as a delimiter.
921	*
922	* @param \Scrivo\String $delimiter The boundary string.
923	* @param int $limit If limit is set and positive, the returned array
924	* will contain a maximum of limit elements with the last element
925	* containing the rest of string. If the limit parameter is negative,
926	* all components except the last -limit are returned. If the limit is
927	* not set or 0 no limit wil be used.
928	*
929	* @return \Scrivo\String[] An array of strings created by splitting the
930	* string parameter on boundaries formed by the delimiter. If the
931	* delimiter was not found and array containing a copy of this string
932	* will be returned except if limit was negative, in that case an
933	* empty array will be returned.
934	*
935	* @throws \Scrivo\SystemException If an empty search string was given.
936	*/
937	public function split(\Scrivo\String $delimiter, $limit=0) {
938	if ($delimiter == "") {
939	throw new \Scrivo\SystemException(
940	"split cannot use an empty \"\" delimiter.");
941	}
942	$r = $limit ? explode($delimiter, $this->str, $limit)
943	: explode($delimiter, $this->str);
944	foreach ($r as $k=>$v) {
945	$r[$k] = new \Scrivo\String($v);
946	}
947	return $r;
948	}
949
950	/**
951	* Get a copy of this string with all of its characters converted to lower
952	* case.
953	*
954	* @return \Scrivo\String A string containing only lower case characters.
955	*/
956	public function toLowerCase() {
957	return new \Scrivo\String(mb_strtolower($this->str, "UTF-8"));
958	}
959
960	/**
961	* Get a copy of this string with all of its characters converted to upper
962	* case.
963	*
964	* @return \Scrivo\String A string containing only upper case characters.
965	*/
966	public function toUpperCase() {
967	return new \Scrivo\String(mb_strtoupper($this->str, "UTF-8"));
968	}
969
970	/**
971	* Compare this string to another \Scrivo\String object.
972	*
973	* Note that this method requires the \Scrivo\String collator to be set,
974	* else the method falls back to the default locale for creating a
975	* collator and generates a warning.
976	*
977	* @param \Scrivo\String $str The string to compare this string to.
978	*
979	* @return int Less than 0 if this string is less than the given
980	* string $str; more than 0 if this string is greater than $str, and
981	* 0 if they are equal.
982	*/
983	public function compareTo(\Scrivo\String $str) {
984	return self::getCollator()->compare($this->str, $str);
985	}
986
987	/**
988	* Check if this string exists an array of \Scrivo\String-s.
989	*
990	* @param \Scrivo\String $arr The array to search.
991	*
992	* @return mixed If found the key of the first occurance of the string
993	* in the array, else null.
994	*/
995	public function inArray($arr) {
996	foreach ($arr as $k=>$v) {
997	if ($v->equals($this)) {
998	return $k;
999	}
1000	}
1001	return null;
1002	}
1003	}
1004