-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStringFunction.php
389 lines (351 loc) · 15.2 KB
/
StringFunction.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
<?php
/**
* Basic template of a class
*
* Void Function (c) 2010
*/
namespace Void;
class StringFunction
{
protected static $selfClosingTag = array("area","base","br","col","command","embed","hr","img","input","keygen","link","meta","param","source","track","wbr");
public static function htmlentities_utf8($string, $quoteStyle = null, $charset = 'UTF-8', $double_encode = null)
{
return htmlentities($string, $quoteStyle, $charset, $double_encode);
}
public static function utf8_htmlentities($string, $quoteStyle = null, $charset = 'UTF-8', $double_encode = null)
{
return self::htmlentities_utf8($string, $quoteStyle, $charset, $double_encode);
}
// should be renamed dateToString and moved to \Infc\Date
// also, add coments!
public static function toTime($date, $format = "", $lang = "")
{
if(is_string($date)) {
$date = strtotime($date);
}
if(!is_numeric($date)) {
return '';
}
if($lang == "") {
$lang = $GLOBALS["lang"];
}
if(strpos(setlocale(LC_TIME, 0), 'fr') !== false) {
$return = strftime("%e %B %Y", $date);
} else {
$return = strftime("%B %e, %Y", $date);
}
return self::isUTF8($return) ? $return : utf8_encode($return);
}
public static function isUTF8($string)
{
return preg_match('%(?:
[\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|\xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|\xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|\xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|[\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|\xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)+%xs', $string);
}
public static function canBeConvertedToUtf8($str)
{
$len = strlen($str);
for($i = 0; $i < $len; $i++) {
$c = ord($str[$i]);
if ($c > 128) {
if (($c > 247)) {
return false;
} elseif ($c > 239) {
$bytes = 4;
} elseif ($c > 223) {
$bytes = 3;
} elseif ($c > 191) {
$bytes = 2;
} else {
return false;
}
if (($i + $bytes) > $len) {
return false;
}
while ($bytes > 1) {
$i++;
$b = ord($str[$i]);
if ($b < 128 || $b > 191) {
return false;
}
$bytes--;
}
}
}
return true;
}
public static function getAccentCharacterMap()
{
return array(
'Š' => 'S', 'Œ' => 'OE', 'Ž' => 'Z', 'š' => 's', 'œ' => 'oe', 'ž' => 'z', 'Ÿ' => 'Y', '¥' => 'Y', 'µ' => 'u', 'À' => 'A', 'Á' => 'A',
'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE', 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I',
'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ø' => 'O', 'Ù' => 'U', 'Ú' => 'U',
'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y', 'ß' => 's', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'ć' => 'c',
'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'o', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o',
'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'ÿ' => 'y',
);
}
public static function removeAccents($string)
{
$replacePairs = self::getAccentCharacterMap();
return str_replace(array_keys($replacePairs), array_values($replacePairs), $string);
}
public static function get($string, $exception = array())
{
return self::clean($string, $exception);
}
public static function convertToCleanString($string, $exception = array())
{
return self::clean($string, $exception);
}
public static function clean($string, $exception = array())
{
if(strlen($string) === 0) {
return $string;
}
if(!is_array($exception)) {
$exception = array($exception);
}
$string = strip_tags($string);
if(!self::isUTF8($string)) {
$string = mb_convert_encoding($string, 'UTF-8');
}
// this switch to utf-8 only to decode it a line later is nessecary for
// entities that are in utf-8 but no equivalent in ISO like …
$string = html_entity_decode($string, 0, 'UTF-8');
$string = mb_convert_encoding($string, 'ISO-8859-1');
$replacePairs = self::getAccentCharacterMap();
// this following characters are the – and the — converted to the normal -
$replacePairs['–'] = '-';
$replacePairs['—'] = '-';
$replacePairs['_'] = ' ';
foreach($replacePairs as $key => $val) {
$replacePairs[mb_convert_encoding($key, 'ISO-8859-1')] = $val;
}
$exception[] = '\w';
$clear = strtolower(preg_replace(
'!-{2,}!',
'-',
strtr(
trim(
preg_replace(
'([^'.implode('', $exception).'])',
' ',
str_replace(array_keys($replacePairs), array_values($replacePairs), $string)
)
),
' ',
'-'
)
));
if(strlen($clear) > 0) {
return $clear;
} else {
throw new Exception("convertToCleanString was not able to create a valid clean name for ({$string})");
}
}
public static function implodeLast($glue, $glueLast, array $array)
{
$last = array_pop($array);
return implode($glue, $array) . (count($array) ? $glueLast : '') . $last;
}
/**
* this will return the first part of an long html but will count only text (not tags) and will close tags.
*
* @param string $html
* @param int $maxLength
* @param bool $cutAtWordBoundry if true will cut at the next word boundry (will not cut in the middle of a word)
* @param string $addCharacters charaters to be added to the end (ex: "..." or "Register to read more"), this will be added to the current tag of the last word
* @param bool $isUtf8
*
* @return string
*/
public static function truncateHtml($html, $maxLength = 0, $cutAtWordBoundry = true, $addCharacters = '', $isUtf8 = true)
{
if($maxLength == 0) {
return $html;
}
$printedLength = 0;
$position = 0;
$tags = array();
$return = '';
// For UTF-8, we need to count multibyte sequences as one character.
$re = $isUtf8
? '{</?([a-z]+)[^>]*>|&#?[a-zA-Z0-9]+;|[\x80-\xFF][\x80-\xBF]*}'
: '{</?([a-z]+)[^>]*>|&#?[a-zA-Z0-9]+;}';
while ($printedLength < $maxLength && preg_match($re, $html, $match, PREG_OFFSET_CAPTURE, $position)) {
list($tag, $tagPosition) = $match[0];
// Print text leading up to the tag.
$str = substr($html, $position, $tagPosition - $position);
if ($printedLength + strlen($str) > $maxLength) {
$return .= substr($str, 0, $maxLength - $printedLength);
$printedLength = $maxLength;
break;
}
$return .= $str;
$printedLength += strlen($str);
if ($printedLength >= $maxLength) {
break;
}
if ($tag[0] == '&' || ord($tag) >= 0x80) {
// Pass the entity or UTF-8 multibyte sequence through unchanged.
$return .= $tag;
$printedLength++;
} else {
// Handle the tag.
$tagName = $match[1][0];
if ($tag[1] == '/') {
// This is a closing tag.
$openingTag = array_pop($tags);
//assert($openingTag == $tagName); // check that tags are properly nested.
$return .= $tag;
} elseif ($tag[strlen($tag) - 2] == '/' || preg_match("(<(".implode('|', self::$selfClosingTag).") )", $tag)) {
// Self-closing tag.
$return .= $tag;
} else {
// Opening tag.
$return .= $tag;
$tags[] = $tagName;
}
}
// Continue after the tag.
$position = $tagPosition + strlen($tag);
}
// Print any remaining text.
if ($printedLength < $maxLength && $position < strlen($html)) {
$return .= substr($html, $position, $maxLength - $printedLength);
}
$return .= $addCharacters;
// Close any open tags.
while (!empty($tags)) {
$return .= sprintf('</%s>', array_pop($tags));
}
return $return;
}
/**
* truncateHtml can truncate a string up to a number of characters while preserving whole words and HTML tags
*
* @param string $text String to truncate.
* @param integer $length Length of returned string, including ellipsis.
* @param string $ending Ending to be appended to the trimmed string.
* @param boolean $exact If false, $text will not be cut mid-word
* @param boolean $considerHtml If true, HTML tags would be handled correctly
*
* @return string Trimmed string.
*/
public static function truncateHtml2($text, $length = 100, $ending = '...', $exact = false, $considerHtml = true)
{
if ($considerHtml) {
// if the plain text is shorter than the maximum length, return the whole text
if (strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
return $text;
}
// splits all html-tags to scanable lines
preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER);
$total_length = strlen($ending);
$open_tags = array();
$truncate = '';
foreach ($lines as $line_matchings) {
// if there is any html-tag in this line, handle it and add it (uncounted) to the output
if (!empty($line_matchings[1])) {
// if it's an "empty element" with or without xhtml-conform closing slash
if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) {
// do nothing
// if tag is a closing tag
} elseif (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) {
// delete tag from $open_tags list
$pos = array_search($tag_matchings[1], $open_tags);
if ($pos !== false) {
unset($open_tags[$pos]);
}
// if tag is an opening tag
} elseif (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) {
// add tag to the beginning of $open_tags list
array_unshift($open_tags, strtolower($tag_matchings[1]));
}
// add html-tag to $truncate'd text
$truncate .= $line_matchings[1];
}
// calculate the length of the plain text part of the line; handle entities as one character
$content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|[0-9a-f]{1,6};/i', ' ', $line_matchings[2]));
if ($total_length + $content_length > $length) {
// the number of characters which are left
$left = $length - $total_length;
$entities_length = 0;
// search for html entities
if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) {
// calculate the real length of all entities in the legal range
foreach ($entities[0] as $entity) {
if ($entity[1] + 1 - $entities_length <= $left) {
$left--;
$entities_length += strlen($entity[0]);
} else {
// no more characters left
break;
}
}
}
$truncate .= substr($line_matchings[2], 0, $left + $entities_length);
// maximum lenght is reached, so get off the loop
break;
} else {
$truncate .= $line_matchings[2];
$total_length += $content_length;
}
// if the maximum length is reached, get off the loop
if($total_length >= $length) {
break;
}
}
} else {
if (strlen($text) <= $length) {
return $text;
} else {
$truncate = substr($text, 0, $length - strlen($ending));
}
}
// if the words shouldn't be cut in the middle...
if (!$exact) {
// ...search the last occurance of a space...
$spacepos = strrpos($truncate, ' ');
if (isset($spacepos)) {
// ...and cut the text in this position
$truncate = substr($truncate, 0, $spacepos);
}
}
// add the defined ending to the text
$truncate .= $ending;
if($considerHtml) {
// close all unclosed html-tags
foreach ($open_tags as $tag) {
$truncate .= '</' . $tag . '>';
}
}
return $truncate;
}
public static function replaceStart($search, $replace, $string, $startPos = 0)
{
if(substr($string, $startPos, strlen($search)) === $search) {
return substr_replace($string, $replace, strpos($string, $search, $startPos), strlen($search));
}
return $string;
}
public static function camel2dashed($className)
{
return strtolower(preg_replace('/([a-zA-Z])(?=[A-Z])/', '$1-', $className));
}
public static function base64url_encode($data)
{
return rtrim(strtr(base64_encode($data), '+/', '-_'), '=');
}
public static function base64url_decode($data)
{
return base64_decode(str_pad(strtr($data, '-_', '+/'), strlen($data) % 4, '=', STR_PAD_RIGHT));
}
}