* @author Alex Makarov * @since 2.0 */ class BaseStringHelper { /** * Returns the number of bytes in the given string. * This method ensures the string is treated as a byte array by using `mb_strlen()`. * * @param string $string the string being measured for length * @return int the number of bytes in the given string. */ public static function byteLength($string) { return mb_strlen((string)$string, '8bit'); } /** * Returns the portion of string specified by the start and length parameters. * This method ensures the string is treated as a byte array by using `mb_substr()`. * * @param string $string the input string. Must be one character or longer. * @param int $start the starting position * @param int|null $length the desired portion length. If not specified or `null`, there will be * no limit on length i.e. the output will be until the end of the string. * @return string the extracted part of string, or FALSE on failure or an empty string. * @see https://www.php.net/manual/en/function.substr.php */ public static function byteSubstr($string, $start, $length = null) { if ($length === null) { $length = static::byteLength($string); } return mb_substr($string, $start, $length, '8bit'); } /** * Returns the trailing name component of a path. * This method is similar to the php function `basename()` except that it will * treat both \ and / as directory separators, independent of the operating system. * This method was mainly created to work on php namespaces. When working with real * file paths, php's `basename()` should work fine for you. * Note: this method is not aware of the actual filesystem, or path components such as "..". * * @param string $path A path string. * @param string $suffix If the name component ends in suffix this will also be cut off. * @return string the trailing name component of the given path. * @see https://www.php.net/manual/en/function.basename.php */ public static function basename($path, $suffix = '') { $len = mb_strlen($suffix); if ($len > 0 && mb_substr($path, -$len) === $suffix) { $path = mb_substr($path, 0, -$len); } $path = rtrim(str_replace('\\', '/', $path), '/'); $pos = mb_strrpos($path, '/'); if ($pos !== false) { return mb_substr($path, $pos + 1); } return $path; } /** * Returns parent directory's path. * This method is similar to `dirname()` except that it will treat * both \ and / as directory separators, independent of the operating system. * * @param string $path A path string. * @return string the parent directory's path. * @see https://www.php.net/manual/en/function.basename.php */ public static function dirname($path) { $normalizedPath = rtrim( str_replace('\\', '/', $path), '/' ); $separatorPosition = mb_strrpos($normalizedPath, '/'); if ($separatorPosition !== false) { return mb_substr($path, 0, $separatorPosition); } return ''; } /** * Truncates a string to the number of characters specified. * * In order to truncate for an exact length, the $suffix char length must be counted towards the $length. For example * to have a string which is exactly 255 long with $suffix `...` of 3 chars, then `StringHelper::truncate($string, 252, '...')` * must be used to ensure you have 255 long string afterwards. * * @param string $string The string to truncate. * @param int $length How many characters from original string to include into truncated string. * @param string $suffix String to append to the end of truncated string. * @param string|null $encoding The charset to use, defaults to charset currently used by application. * @param bool $asHtml Whether to treat the string being truncated as HTML and preserve proper HTML tags. * This parameter is available since version 2.0.1. * @return string the truncated string. */ public static function truncate($string, $length, $suffix = '...', $encoding = null, $asHtml = false) { if ($encoding === null) { $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8'; } if ($asHtml) { return static::truncateHtml($string, $length, $suffix, $encoding); } if (mb_strlen($string, $encoding) > $length) { return rtrim(mb_substr($string, 0, $length, $encoding)) . $suffix; } return $string; } /** * Truncates a string to the number of words specified. * * @param string $string The string to truncate. * @param int $count How many words from original string to include into truncated string. * @param string $suffix String to append to the end of truncated string. * @param bool $asHtml Whether to treat the string being truncated as HTML and preserve proper HTML tags. * This parameter is available since version 2.0.1. * @return string the truncated string. */ public static function truncateWords($string, $count, $suffix = '...', $asHtml = false) { if ($asHtml) { return static::truncateHtml($string, $count, $suffix); } $words = preg_split('/(\s+)/u', trim($string), 0, PREG_SPLIT_DELIM_CAPTURE); if (count($words) / 2 > $count) { return implode('', array_slice($words, 0, ($count * 2) - 1)) . $suffix; } return $string; } /** * Truncate a string while preserving the HTML. * * @param string $string The string to truncate * @param int $count The counter * @param string $suffix String to append to the end of the truncated string. * @param string|bool $encoding Encoding flag or charset. * @return string * @since 2.0.1 */ protected static function truncateHtml($string, $count, $suffix, $encoding = false) { $config = \HTMLPurifier_Config::create(null); if (Yii::$app !== null) { $config->set('Cache.SerializerPath', Yii::$app->getRuntimePath()); } $lexer = \HTMLPurifier_Lexer::create($config); $tokens = $lexer->tokenizeHTML($string, $config, new \HTMLPurifier_Context()); $openTokens = []; $totalCount = 0; $depth = 0; $truncated = []; foreach ($tokens as $token) { if ($token instanceof \HTMLPurifier_Token_Start) { //Tag begins $openTokens[$depth] = $token->name; $truncated[] = $token; ++$depth; } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) { //Text if (false === $encoding) { preg_match('/^(\s*)/um', $token->data, $prefixSpace) ?: $prefixSpace = ['', '']; $token->data = $prefixSpace[1] . self::truncateWords(ltrim($token->data), $count - $totalCount, ''); $currentCount = self::countWords($token->data); } else { $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding); $currentCount = mb_strlen($token->data, $encoding); } $totalCount += $currentCount; $truncated[] = $token; } elseif ($token instanceof \HTMLPurifier_Token_End) { //Tag ends if ($token->name === $openTokens[$depth - 1]) { --$depth; unset($openTokens[$depth]); $truncated[] = $token; } } elseif ($token instanceof \HTMLPurifier_Token_Empty) { //Self contained tags, i.e. etc. $truncated[] = $token; } if ($totalCount >= $count) { if (0 < count($openTokens)) { krsort($openTokens); foreach ($openTokens as $name) { $truncated[] = new \HTMLPurifier_Token_End($name); } } break; } } $context = new \HTMLPurifier_Context(); $generator = new \HTMLPurifier_Generator($config, $context); return $generator->generateFromTokens($truncated) . ($totalCount >= $count ? $suffix : ''); } /** * Check if given string starts with specified substring. Binary and multibyte safe. * * @param string $string Input string * @param string $with Part to search inside the $string * @param bool $caseSensitive Case sensitive search. Default is true. When case sensitive is enabled, `$with` must * exactly match the starting of the string in order to get a true value. * @return bool Returns true if first input starts with second input, false otherwise */ public static function startsWith($string, $with, $caseSensitive = true) { if (!$bytes = static::byteLength($with)) { return true; } if ($caseSensitive) { return strncmp($string, $with, $bytes) === 0; } $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8'; $string = static::byteSubstr($string, 0, $bytes); return mb_strtolower($string, $encoding) === mb_strtolower($with, $encoding); } /** * Check if given string ends with specified substring. Binary and multibyte safe. * * @param string $string Input string to check * @param string $with Part to search inside of the `$string`. * @param bool $caseSensitive Case sensitive search. Default is true. When case sensitive is enabled, `$with` must * exactly match the ending of the string in order to get a true value. * @return bool Returns true if first input ends with second input, false otherwise */ public static function endsWith($string, $with, $caseSensitive = true) { if (!$bytes = static::byteLength($with)) { return true; } if ($caseSensitive) { // Warning check, see https://php.net/substr-compare#refsect1-function.substr-compare-returnvalues if (static::byteLength($string) < $bytes) { return false; } return substr_compare($string, $with, -$bytes, $bytes) === 0; } $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8'; $string = static::byteSubstr($string, -$bytes); return mb_strtolower($string, $encoding) === mb_strtolower($with, $encoding); } /** * Explodes string into array, optionally trims values and skips empty ones. * * @param string $string String to be exploded. * @param string $delimiter Delimiter. Default is ','. * @param mixed $trim Whether to trim each element. Can be: * - boolean - to trim normally; * - string - custom characters to trim. Will be passed as a second argument to `trim()` function. * - callable - will be called for each value instead of trim. Takes the only argument - value. * @param bool $skipEmpty Whether to skip empty strings between delimiters. Default is false. * @return array * @since 2.0.4 */ public static function explode($string, $delimiter = ',', $trim = true, $skipEmpty = false) { $result = explode($delimiter, $string); if ($trim !== false) { if ($trim === true) { $trim = 'trim'; } elseif (!is_callable($trim)) { $trim = function ($v) use ($trim) { return trim($v, $trim); }; } $result = array_map($trim, $result); } if ($skipEmpty) { // Wrapped with array_values to make array keys sequential after empty values removing $result = array_values(array_filter($result, function ($value) { return $value !== ''; })); } return $result; } /** * Counts words in a string. * * @param string $string the text to calculate * @return int * @since 2.0.8 */ public static function countWords($string) { return count(preg_split('/\s+/u', $string, 0, PREG_SPLIT_NO_EMPTY)); } /** * Returns string representation of number value with replaced commas to dots, if decimal point * of current locale is comma. * * @param int|float|string $value the value to normalize. * @return string * @since 2.0.11 */ public static function normalizeNumber($value) { $value = (string) $value; $localeInfo = localeconv(); $decimalSeparator = isset($localeInfo['decimal_point']) ? $localeInfo['decimal_point'] : null; if ($decimalSeparator !== null && $decimalSeparator !== '.') { $value = str_replace($decimalSeparator, '.', $value); } return $value; } /** * Encodes string into "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). * * > Note: Base 64 padding `=` may be at the end of the returned string. * > `=` is not transparent to URL encoding. * * @param string $input the string to encode. * @return string encoded string. * @see https://tools.ietf.org/html/rfc4648#page-7 * @since 2.0.12 */ public static function base64UrlEncode($input) { return strtr(base64_encode($input), '+/', '-_'); } /** * Decodes "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). * * @param string $input encoded string. * @return string decoded string. * @see https://tools.ietf.org/html/rfc4648#page-7 * @since 2.0.12 */ public static function base64UrlDecode($input) { return base64_decode(strtr($input, '-_', '+/')); } /** * Safely casts a float to string independent of the current locale. * The decimal separator will always be `.`. * * @param float|int $number a floating point number or integer. * @return string the string representation of the number. * @since 2.0.13 */ public static function floatToString($number) { // . and , are the only decimal separators known in ICU data, // so its safe to call str_replace here return str_replace(',', '.', (string) $number); } /** * Checks if the passed string would match the given shell wildcard pattern. * This function emulates [[fnmatch()]], which may be unavailable at certain environment, using PCRE. * * @param string $pattern the shell wildcard pattern. * @param string $string the tested string. * @param array $options options for matching. Valid options are: * * - caseSensitive: bool, whether pattern should be case sensitive. Defaults to `true`. * - escape: bool, whether backslash escaping is enabled. Defaults to `true`. * - filePath: bool, whether slashes in string only matches slashes in the given pattern. Defaults to `false`. * * @return bool whether the string matches pattern or not. * @since 2.0.14 */ public static function matchWildcard($pattern, $string, $options = []) { if ($pattern === '*' && empty($options['filePath'])) { return true; } $replacements = [ '\\\\\\\\' => '\\\\', '\\\\\\*' => '[*]', '\\\\\\?' => '[?]', '\*' => '.*', '\?' => '.', '\[\!' => '[^', '\[' => '[', '\]' => ']', '\-' => '-', ]; if (isset($options['escape']) && !$options['escape']) { unset($replacements['\\\\\\\\']); unset($replacements['\\\\\\*']); unset($replacements['\\\\\\?']); } if (!empty($options['filePath'])) { $replacements['\*'] = '[^/\\\\]*'; $replacements['\?'] = '[^/\\\\]'; } $pattern = strtr(preg_quote($pattern, '#'), $replacements); $pattern = '#^' . $pattern . '$#us'; if (isset($options['caseSensitive']) && !$options['caseSensitive']) { $pattern .= 'i'; } return preg_match($pattern, (string)$string) === 1; } /** * This method provides a unicode-safe implementation of built-in PHP function `ucfirst()`. * * @param string $string the string to be proceeded * @param string $encoding Optional, defaults to "UTF-8" * @return string * @see https://www.php.net/manual/en/function.ucfirst.php * @since 2.0.16 */ public static function mb_ucfirst($string, $encoding = 'UTF-8') { $firstChar = mb_substr((string)$string, 0, 1, $encoding); $rest = mb_substr((string)$string, 1, null, $encoding); return mb_strtoupper($firstChar, $encoding) . $rest; } /** * This method provides a unicode-safe implementation of built-in PHP function `ucwords()`. * * @param string $string the string to be proceeded * @param string $encoding Optional, defaults to "UTF-8" * @return string * @see https://www.php.net/manual/en/function.ucwords * @since 2.0.16 */ public static function mb_ucwords($string, $encoding = 'UTF-8') { $string = (string) $string; if (empty($string)) { return $string; } $parts = preg_split('/(\s+[^\w]+\s+|^[^\w]+\s+|\s+)/u', $string, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); $ucfirstEven = trim(mb_substr($parts[0], -1, 1, $encoding)) === ''; foreach ($parts as $key => $value) { $isEven = (bool)($key % 2); if ($ucfirstEven === $isEven) { $parts[$key] = static::mb_ucfirst($value, $encoding); } } return implode('', $parts); } }