You've already forked joomla_test
first commit
This commit is contained in:
1
libraries/joomla/string/index.html
Normal file
1
libraries/joomla/string/index.html
Normal file
@ -0,0 +1 @@
|
||||
<!DOCTYPE html><title></title>
|
462
libraries/joomla/string/inflector.php
Normal file
462
libraries/joomla/string/inflector.php
Normal file
@ -0,0 +1,462 @@
|
||||
<?php
|
||||
/**
|
||||
* @package Joomla.Platform
|
||||
* @subpackage String
|
||||
*
|
||||
* @copyright Copyright (C) 2005 - 2011 Open Source Matters, Inc. All rights reserved.
|
||||
* @license GNU General Public License version 2 or later; see LICENSE
|
||||
*/
|
||||
|
||||
defined('JPATH_PLATFORM') or die;
|
||||
|
||||
/**
|
||||
* Joomla Platform String Inflector Class
|
||||
*
|
||||
* The Inflector transforms words
|
||||
*
|
||||
* @package Joomla.Platform
|
||||
* @subpackage String
|
||||
* @since 12.1
|
||||
*/
|
||||
class JStringInflector
|
||||
{
|
||||
/**
|
||||
* The singleton instance.
|
||||
*
|
||||
* @var JStringInflector
|
||||
* @since 12.1
|
||||
*/
|
||||
private static $_instance;
|
||||
|
||||
/**
|
||||
* The inflector rules for singularisation, pluralisation and countability.
|
||||
*
|
||||
* @var array
|
||||
* @since 12.1
|
||||
*/
|
||||
private $_rules = array(
|
||||
'singular' => array(
|
||||
'/(matr)ices$/i' => '\1ix',
|
||||
'/(vert|ind)ices$/i' => '\1ex',
|
||||
'/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$/i' => '\1us',
|
||||
'/([ftw]ax)es/i' => '\1',
|
||||
'/(cris|ax|test)es$/i' => '\1is',
|
||||
'/(shoe|slave)s$/i' => '\1',
|
||||
'/(o)es$/i' => '\1',
|
||||
'/([^aeiouy]|qu)ies$/i' => '\1y',
|
||||
'/$1ses$/i' => '\s',
|
||||
'/ses$/i' => '\s',
|
||||
'/eaus$/' => 'eau',
|
||||
'/^(.*us)$/' => '\\1',
|
||||
'/s$/i' => '',
|
||||
),
|
||||
'plural' => array(
|
||||
'/([m|l])ouse$/i' => '\1ice',
|
||||
'/(matr|vert|ind)(ix|ex)$/i' => '\1ices',
|
||||
'/(x|ch|ss|sh)$/i' => '\1es',
|
||||
'/([^aeiouy]|qu)y$/i' => '\1ies',
|
||||
'/([^aeiouy]|qu)ies$/i' => '\1y',
|
||||
'/(?:([^f])fe|([lr])f)$/i' => '\1\2ves',
|
||||
'/sis$/i' => 'ses',
|
||||
'/([ti])um$/i' => '\1a',
|
||||
'/(buffal|tomat)o$/i' => '\1\2oes',
|
||||
'/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$/i' => '\1i',
|
||||
'/us$/i' => 'uses',
|
||||
'/(ax|cris|test)is$/i' => '\1es',
|
||||
'/s$/i' => 's',
|
||||
'/$/' => 's',
|
||||
),
|
||||
'countable' => array(
|
||||
'id',
|
||||
'hits',
|
||||
'clicks',
|
||||
),
|
||||
);
|
||||
|
||||
/**
|
||||
* Cached inflections.
|
||||
*
|
||||
* The array is in the form [singular => plural]
|
||||
*
|
||||
* @var array
|
||||
* @since 12.1
|
||||
*/
|
||||
private $_cache = array();
|
||||
|
||||
/**
|
||||
* Protected constructor.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
protected function __construct()
|
||||
{
|
||||
// Pre=populate the irregual singular/plural.
|
||||
$this
|
||||
->addWord('deer')
|
||||
->addWord('moose')
|
||||
->addWord('sheep')
|
||||
->addWord('bison')
|
||||
->addWord('salmon')
|
||||
->addWord('pike')
|
||||
->addWord('trout')
|
||||
->addWord('fish')
|
||||
->addWord('swine')
|
||||
|
||||
->addWord('alias', 'aliases')
|
||||
->addWord('bus', 'buses')
|
||||
->addWord('foot', 'feet')
|
||||
->addWord('goose', 'geese')
|
||||
->addWord('hive', 'hives')
|
||||
->addWord('louse', 'lice')
|
||||
->addWord('man', 'men')
|
||||
->addWord('mouse', 'mice')
|
||||
->addWord('ox', 'oxen')
|
||||
->addWord('quiz', 'quizes')
|
||||
->addWord('status', 'statuses')
|
||||
->addWord('tooth', 'teeth')
|
||||
->addWord('woman', 'women');
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds inflection regex rules to the inflector.
|
||||
*
|
||||
* @param mixed $data A string or an array of strings or regex rules to add.
|
||||
* @param string $ruleType The rule type: singular | plural | countable
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 12.1
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
private function _addRule($data, $ruleType)
|
||||
{
|
||||
if (is_string($data))
|
||||
{
|
||||
$data = array($data);
|
||||
}
|
||||
elseif (!is_array($data))
|
||||
{
|
||||
// Do not translate.
|
||||
throw new InvalidArgumentException('Invalid inflector rule data.');
|
||||
}
|
||||
|
||||
foreach ($data as $rule)
|
||||
{
|
||||
// Ensure a string is pushed.
|
||||
array_push($this->_rules[$ruleType], (string) $rule);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an inflected word from the cache where the singular form is supplied.
|
||||
*
|
||||
* @param string $singular A singular form of a word.
|
||||
*
|
||||
* @return mixed The cached inflection or false if none found.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
private function _getCachedPlural($singular)
|
||||
{
|
||||
$singular = JString::strtolower($singular);
|
||||
|
||||
// Check if the word is in cache.
|
||||
if (isset($this->_cache[$singular]))
|
||||
{
|
||||
return $this->_cache[$singular];
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an inflected word from the cache where the plural form is supplied.
|
||||
*
|
||||
* @param string $plural A plural form of a word.
|
||||
*
|
||||
* @return mixed The cached inflection or false if none found.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
private function _getCachedSingular($plural)
|
||||
{
|
||||
$plural = JString::strtolower($plural);
|
||||
|
||||
return array_search($plural, $this->_cache);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a regex from rules.
|
||||
*
|
||||
* The 'plural' rule type expects a singular word.
|
||||
* The 'singular' rule type expects a plural word.
|
||||
*
|
||||
* @param string $word The string input.
|
||||
* @param string $ruleType String (eg, singular|plural)
|
||||
*
|
||||
* @return mixed An inflected string, or false if no rule could be applied.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
private function _matchRegexRule($word, $ruleType)
|
||||
{
|
||||
// Cycle through the regex rules.
|
||||
foreach ($this->_rules[$ruleType] as $regex => $replacement)
|
||||
{
|
||||
$matches = 0;
|
||||
$matchedWord = preg_replace($regex, $replacement, $word, -1, $matches);
|
||||
|
||||
if ($matches > 0)
|
||||
{
|
||||
return $matchedWord;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets an inflected word in the cache.
|
||||
*
|
||||
* @param string $singular The singular form of the word.
|
||||
* @param string $plural The plural form of the word. If omitted, it is assumed the singular and plural are identical.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
private function _setCache($singular, $plural = null)
|
||||
{
|
||||
$singular = JString::strtolower($singular);
|
||||
|
||||
if ($plural === null)
|
||||
{
|
||||
$plural = $singular;
|
||||
}
|
||||
else
|
||||
{
|
||||
$plural = JString::strtolower($plural);
|
||||
}
|
||||
|
||||
$this->_cache[$singular] = $plural;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a countable word.
|
||||
*
|
||||
* @param mixed $data A string or an array of strings to add.
|
||||
*
|
||||
* @return JStringInflector Returns this object to support chaining.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function addCountableRule($data)
|
||||
{
|
||||
$this->_addRule($data, 'countable');
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a specific singular-plural pair for a word.
|
||||
*
|
||||
* @param string $singular The singular form of the word.
|
||||
* @param string $plural The plural form of the word. If omitted, it is assumed the singular and plural are identical.
|
||||
*
|
||||
* @return JStringInflector Returns this object to support chaining.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function addWord($singular, $plural =null)
|
||||
{
|
||||
$this->_setCache($singular, $plural);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a pluralisation rule.
|
||||
*
|
||||
* @param mixed $data A string or an array of regex rules to add.
|
||||
*
|
||||
* @return JStringInflector Returns this object to support chaining.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function addPluraliseRule($data)
|
||||
{
|
||||
$this->_addRule($data, 'plural');
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a singularisation rule.
|
||||
*
|
||||
* @param mixed $data A string or an array of regex rules to add.
|
||||
*
|
||||
* @return JStringInflector Returns this object to support chaining.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function addSingulariseRule($data)
|
||||
{
|
||||
$this->_addRule($data, 'singular');
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an instance of the JStringInflector singleton.
|
||||
*
|
||||
* @param boolean $new If true (default is false), returns a new instance regardless if one exists.
|
||||
* This argument is mainly used for testing.
|
||||
*
|
||||
* @return JStringInflector
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public static function getInstance($new = false)
|
||||
{
|
||||
if ($new)
|
||||
{
|
||||
return new static;
|
||||
}
|
||||
elseif (!is_object(self::$_instance))
|
||||
{
|
||||
self::$_instance = new static;
|
||||
}
|
||||
|
||||
return self::$_instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a word is countable.
|
||||
*
|
||||
* @param string $word The string input.
|
||||
*
|
||||
* @return boolean True if word is countable, false otherwise.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function isCountable($word)
|
||||
{
|
||||
return (boolean) in_array($word, $this->_rules['countable']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a word is in a plural form.
|
||||
*
|
||||
* @param string $word The string input.
|
||||
*
|
||||
* @return boolean True if word is plural, false if not.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function isPlural($word)
|
||||
{
|
||||
// Try the cache for an known inflection.
|
||||
$inflection = $this->_getCachedSingular($word);
|
||||
|
||||
if ($inflection !== false)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Compute the inflection to cache the values, and compare.
|
||||
return $this->toPlural($this->toSingular($word)) == $word;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a word is in a singular form.
|
||||
*
|
||||
* @param string $word The string input.
|
||||
*
|
||||
* @return boolean True if word is singular, false if not.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function isSingular($word)
|
||||
{
|
||||
// Try the cache for an known inflection.
|
||||
$inflection = $this->_getCachedPlural($word);
|
||||
|
||||
if ($inflection !== false)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Compute the inflection to cache the values, and compare.
|
||||
return $this->toSingular($this->toPlural($word)) == $word;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a word into its plural form.
|
||||
*
|
||||
* @param string $word The singular word to pluralise.
|
||||
*
|
||||
* @return mixed An inflected string, or false if no rule could be applied.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function toPlural($word)
|
||||
{
|
||||
// Try to get the cached plural form from the singular.
|
||||
$cache = $this->_getCachedPlural($word);
|
||||
if ($cache !== false)
|
||||
{
|
||||
return $cache;
|
||||
}
|
||||
|
||||
// Check if the word is a known singular.
|
||||
if ($this->_getCachedSingular($word))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute the inflection.
|
||||
$inflected = $this->_matchRegexRule($word, 'plural');
|
||||
if ($inflected !== false)
|
||||
{
|
||||
$this->_setCache($word, $inflected);
|
||||
return $inflected;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a word into its singular form.
|
||||
*
|
||||
* @param string $word The plural word to singularise.
|
||||
*
|
||||
* @return mixed An inflected string, or false if no rule could be applied.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public function toSingular($word)
|
||||
{
|
||||
// Try to get the cached singular form from the plural.
|
||||
$cache = $this->_getCachedSingular($word);
|
||||
if ($cache !== false)
|
||||
{
|
||||
return $cache;
|
||||
}
|
||||
|
||||
// Check if the word is a known plural.
|
||||
if ($this->_getCachedPlural($word))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute the inflection.
|
||||
$inflected = $this->_matchRegexRule($word, 'singular');
|
||||
if ($inflected !== false)
|
||||
{
|
||||
$this->_setCache($inflected, $word);
|
||||
return $inflected;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
165
libraries/joomla/string/normalise.php
Normal file
165
libraries/joomla/string/normalise.php
Normal file
@ -0,0 +1,165 @@
|
||||
<?php
|
||||
/**
|
||||
* @package Joomla.Platform
|
||||
* @subpackage String
|
||||
*
|
||||
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
|
||||
* @license GNU General Public License version 2 or later; see LICENSE
|
||||
*/
|
||||
|
||||
defined('JPATH_PLATFORM') or die;
|
||||
|
||||
/**
|
||||
* Joomla Platform String Normalise Class
|
||||
*
|
||||
* @package Joomla.Platform
|
||||
* @subpackage String
|
||||
* @since 11.3
|
||||
*/
|
||||
abstract class JStringNormalise
|
||||
{
|
||||
/**
|
||||
* Method to convert a string from camel case.
|
||||
*
|
||||
* This method offers two modes. Grouped allows for splitting on groups of uppercase characters as follows:
|
||||
*
|
||||
* "FooBarABCDef" becomes array("Foo", "Bar", "ABC", "Def")
|
||||
* "JFooBar" becomes array("J", "Foo", "Bar")
|
||||
* "J001FooBar002" becomes array("J001", "Foo", "Bar002")
|
||||
* "abcDef" becomes array("abc", "Def")
|
||||
* "abc_defGhi_Jkl" becomes array("abc_def", "Ghi_Jkl")
|
||||
* "ThisIsA_NASAAstronaut" becomes array("This", "Is", "A_NASA", "Astronaut"))
|
||||
* "JohnFitzgerald_Kennedy" becomes array("John", "Fitzgerald_Kennedy"))
|
||||
*
|
||||
* Non-grouped will split strings at each uppercase character.
|
||||
*
|
||||
* @param string $input The string input (ASCII only).
|
||||
* @param boolean $grouped Optionally allows splitting on groups of uppercase characters.
|
||||
*
|
||||
* @return string The space separated string.
|
||||
*
|
||||
* @since 12.1
|
||||
*/
|
||||
public static function fromCamelCase($input, $grouped = false)
|
||||
{
|
||||
return $grouped
|
||||
? preg_split('/(?<=[^A-Z_])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][^A-Z_])/x', $input)
|
||||
: trim(preg_replace('#([A-Z])#', ' $1', $input));
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to convert a string into camel case.
|
||||
*
|
||||
* @param string $input The string input (ASCII only).
|
||||
*
|
||||
* @return string The camel case string.
|
||||
*
|
||||
* @since 11.3
|
||||
*/
|
||||
public static function toCamelCase($input)
|
||||
{
|
||||
// Convert words to uppercase and then remove spaces.
|
||||
$input = self::toSpaceSeparated($input);
|
||||
$input = ucwords($input);
|
||||
$input = str_ireplace(' ', '', $input);
|
||||
|
||||
return $input;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to convert a string into dash separated form.
|
||||
*
|
||||
* @param string $input The string input (ASCII only).
|
||||
*
|
||||
* @return string The dash separated string.
|
||||
*
|
||||
* @since 11.3
|
||||
*/
|
||||
public static function toDashSeparated($input)
|
||||
{
|
||||
// Convert spaces and underscores to dashes.
|
||||
$input = preg_replace('#[ \-_]+#', '-', $input);
|
||||
|
||||
return $input;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to convert a string into space separated form.
|
||||
*
|
||||
* @param string $input The string input (ASCII only).
|
||||
*
|
||||
* @return string The space separated string.
|
||||
*
|
||||
* @since 11.3
|
||||
*/
|
||||
public static function toSpaceSeparated($input)
|
||||
{
|
||||
// Convert underscores and dashes to spaces.
|
||||
$input = preg_replace('#[ \-_]+#', ' ', $input);
|
||||
|
||||
return $input;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to convert a string into underscore separated form.
|
||||
*
|
||||
* @param string $input The string input (ASCII only).
|
||||
*
|
||||
* @return string The underscore separated string.
|
||||
*
|
||||
* @since 11.3
|
||||
*/
|
||||
public static function toUnderscoreSeparated($input)
|
||||
{
|
||||
// Convert spaces and dashes to underscores.
|
||||
$input = preg_replace('#[ \-_]+#', '_', $input);
|
||||
|
||||
return $input;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to convert a string into variable form.
|
||||
*
|
||||
* @param string $input The string input (ASCII only).
|
||||
*
|
||||
* @return string The variable string.
|
||||
*
|
||||
* @since 11.3
|
||||
*/
|
||||
public static function toVariable($input)
|
||||
{
|
||||
// Remove dashes and underscores, then convert to camel case.
|
||||
$input = self::toSpaceSeparated($input);
|
||||
$input = self::toCamelCase($input);
|
||||
|
||||
// Remove leading digits.
|
||||
$input = preg_replace('#^[0-9]+.*$#', '', $input);
|
||||
|
||||
// Lowercase the first character.
|
||||
$first = substr($input, 0, 1);
|
||||
$first = strtolower($first);
|
||||
|
||||
// Replace the first character with the lowercase character.
|
||||
$input = substr_replace($input, $first, 0, 1);
|
||||
|
||||
return $input;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to convert a string into key form.
|
||||
*
|
||||
* @param string $input The string input (ASCII only).
|
||||
*
|
||||
* @return string The key string.
|
||||
*
|
||||
* @since 11.3
|
||||
*/
|
||||
public static function toKey($input)
|
||||
{
|
||||
// Remove spaces and dashes, then convert to lower case.
|
||||
$input = self::toUnderscoreSeparated($input);
|
||||
$input = strtolower($input);
|
||||
|
||||
return $input;
|
||||
}
|
||||
}
|
246
libraries/joomla/string/punycode.php
Normal file
246
libraries/joomla/string/punycode.php
Normal file
@ -0,0 +1,246 @@
|
||||
<?php
|
||||
/**
|
||||
* @package Joomla.Platform
|
||||
* @subpackage String
|
||||
*
|
||||
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
|
||||
* @license GNU General Public License version 2 or later; see LICENSE
|
||||
*/
|
||||
|
||||
defined('JPATH_PLATFORM') or die;
|
||||
|
||||
JLoader::register('idna_convert', JPATH_ROOT . '/libraries/idna_convert/idna_convert.class.php');
|
||||
|
||||
/**
|
||||
* Joomla Platform String Punycode Class
|
||||
*
|
||||
* Class for handling UTF-8 URLs
|
||||
* Wraps the Punycode library
|
||||
* All functions assume the validity of utf-8 URLs.
|
||||
*
|
||||
* @package Joomla.Platform
|
||||
* @subpackage String
|
||||
* @since 3.1.2
|
||||
*/
|
||||
abstract class JStringPunycode
|
||||
{
|
||||
/**
|
||||
* Transforms a UTF-8 string to a Punycode string
|
||||
*
|
||||
* @param string $utfString The UTF-8 string to transform
|
||||
*
|
||||
* @return string The punycode string
|
||||
*
|
||||
* @since 3.1.2
|
||||
*/
|
||||
public static function toPunycode($utfString)
|
||||
{
|
||||
$idn = new idna_convert;
|
||||
|
||||
return $idn->encode($utfString);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms a Punycode string to a UTF-8 string
|
||||
*
|
||||
* @param string $punycodeString The Punycode string to transform
|
||||
*
|
||||
* @return string The UF-8 URL
|
||||
*
|
||||
* @since 3.1.2
|
||||
*/
|
||||
public static function fromPunycode($punycodeString)
|
||||
{
|
||||
$idn = new idna_convert;
|
||||
|
||||
return $idn->decode($punycodeString);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms a UTF-8 URL to a Punycode URL
|
||||
*
|
||||
* @param string $uri The UTF-8 URL to transform
|
||||
*
|
||||
* @return string The punycode URL
|
||||
*
|
||||
* @since 3.1.2
|
||||
*/
|
||||
public static function urlToPunycode($uri)
|
||||
{
|
||||
$parsed = JString::parse_url($uri);
|
||||
|
||||
if (!isset($parsed['host']) || $parsed['host'] == '')
|
||||
{
|
||||
// If there is no host we do not need to convert it.
|
||||
return;
|
||||
}
|
||||
|
||||
$host = $parsed['host'];
|
||||
$hostExploded = explode('.', $host);
|
||||
$newhost = '';
|
||||
|
||||
foreach ($hostExploded as $hostex)
|
||||
{
|
||||
$hostex = static::toPunycode($hostex);
|
||||
$newhost .= $hostex . '.';
|
||||
}
|
||||
|
||||
$newhost = substr($newhost, 0, -1);
|
||||
$newuri = '';
|
||||
|
||||
if (!empty($parsed['scheme']))
|
||||
{
|
||||
// Assume :// is required although it is not always.
|
||||
$newuri .= $parsed['scheme'] . '://';
|
||||
}
|
||||
|
||||
if (!empty($newhost))
|
||||
{
|
||||
$newuri .= $newhost;
|
||||
}
|
||||
|
||||
if (!empty($parsed['path']))
|
||||
{
|
||||
$newuri .= $parsed['path'];
|
||||
}
|
||||
|
||||
if (!empty($parsed['query']))
|
||||
{
|
||||
$newuri .= '?' . $parsed['query'];
|
||||
}
|
||||
|
||||
return $newuri;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms a Punycode URL to a UTF-8 URL
|
||||
*
|
||||
* @param string $uri The Punycode URL to transform
|
||||
*
|
||||
* @return string The UTF-8 URL
|
||||
*
|
||||
* @since 3.1.2
|
||||
*/
|
||||
public static function urlToUTF8($uri)
|
||||
{
|
||||
if (empty($uri))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
$parsed = JString::parse_url($uri);
|
||||
|
||||
if (!isset($parsed['host']) || $parsed['host'] == '')
|
||||
{
|
||||
// If there is no host we do not need to convert it.
|
||||
return $uri;
|
||||
}
|
||||
|
||||
$host = $parsed['host'];
|
||||
$hostExploded = explode('.', $host);
|
||||
$newhost = '';
|
||||
|
||||
foreach ($hostExploded as $hostex)
|
||||
{
|
||||
$hostex = self::fromPunycode($hostex);
|
||||
$newhost .= $hostex . '.';
|
||||
}
|
||||
|
||||
$newhost = substr($newhost, 0, -1);
|
||||
$newuri = '';
|
||||
|
||||
if (!empty($parsed['scheme']))
|
||||
{
|
||||
// Assume :// is required although it is not always.
|
||||
$newuri .= $parsed['scheme'] . '://';
|
||||
}
|
||||
|
||||
if (!empty($newhost))
|
||||
{
|
||||
$newuri .= $newhost;
|
||||
}
|
||||
|
||||
if (!empty($parsed['path']))
|
||||
{
|
||||
$newuri .= $parsed['path'];
|
||||
}
|
||||
|
||||
if (!empty($parsed['query']))
|
||||
{
|
||||
$newuri .= '?' . $parsed['query'];
|
||||
}
|
||||
|
||||
return $newuri;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms a UTF-8 e-mail to a Punycode e-mail
|
||||
* This assumes a valid email address
|
||||
*
|
||||
* @param string $email The UTF-8 e-mail to transform
|
||||
*
|
||||
* @return string The punycode e-mail
|
||||
*
|
||||
* @since 3.1.2
|
||||
*/
|
||||
public static function emailToPunycode($email)
|
||||
{
|
||||
$explodedAddress = explode('@', $email);
|
||||
|
||||
// Not addressing UTF-8 user names
|
||||
$newEmail = $explodedAddress[0];
|
||||
|
||||
if (!empty($explodedAddress[1]))
|
||||
{
|
||||
$domainExploded = explode('.', $explodedAddress[1]);
|
||||
$newdomain = '';
|
||||
|
||||
foreach ($domainExploded as $domainex)
|
||||
{
|
||||
$domainex = static::toPunycode($domainex);
|
||||
$newdomain .= $domainex . '.';
|
||||
}
|
||||
|
||||
$newdomain = substr($newdomain, 0, -1);
|
||||
$newEmail = $newEmail . '@' . $newdomain;
|
||||
}
|
||||
|
||||
return $newEmail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms a Punycode e-mail to a UTF-8 e-mail
|
||||
* This assumes a valid email address
|
||||
*
|
||||
* @param string $email The punycode e-mail to transform
|
||||
*
|
||||
* @return string The punycode e-mail
|
||||
*
|
||||
* @since 3.1.2
|
||||
*/
|
||||
public static function emailToUTF8($email)
|
||||
{
|
||||
$explodedAddress = explode('@', $email);
|
||||
|
||||
// Not addressing UTF-8 user names
|
||||
$newEmail = $explodedAddress[0];
|
||||
|
||||
if (!empty($explodedAddress[1]))
|
||||
{
|
||||
$domainExploded = explode('.', $explodedAddress[1]);
|
||||
$newdomain = '';
|
||||
|
||||
foreach ($domainExploded as $domainex)
|
||||
{
|
||||
$domainex = static::fromPunycode($domainex);
|
||||
$newdomain .= $domainex . '.';
|
||||
}
|
||||
|
||||
$newdomain = substr($newdomain, 0, -1);
|
||||
$newEmail = $newEmail . '@' . $newdomain;
|
||||
}
|
||||
|
||||
return $newEmail;
|
||||
}
|
||||
}
|
969
libraries/joomla/string/string.php
Normal file
969
libraries/joomla/string/string.php
Normal file
@ -0,0 +1,969 @@
|
||||
<?php
|
||||
/**
|
||||
* @package Joomla.Platform
|
||||
* @subpackage String
|
||||
*
|
||||
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved
|
||||
* @license GNU General Public License version 2 or later; see LICENSE
|
||||
*/
|
||||
|
||||
defined('JPATH_PLATFORM') or die;
|
||||
|
||||
// PHP mbstring and iconv local configuration
|
||||
|
||||
// Check if mbstring extension is loaded and attempt to load it if not present except for windows
|
||||
if (extension_loaded('mbstring'))
|
||||
{
|
||||
// Make sure to suppress the output in case ini_set is disabled
|
||||
@ini_set('mbstring.internal_encoding', 'UTF-8');
|
||||
@ini_set('mbstring.http_input', 'UTF-8');
|
||||
@ini_set('mbstring.http_output', 'UTF-8');
|
||||
}
|
||||
|
||||
// Same for iconv
|
||||
if (function_exists('iconv'))
|
||||
{
|
||||
// These are settings that can be set inside code
|
||||
iconv_set_encoding("internal_encoding", "UTF-8");
|
||||
iconv_set_encoding("input_encoding", "UTF-8");
|
||||
iconv_set_encoding("output_encoding", "UTF-8");
|
||||
}
|
||||
|
||||
/**
|
||||
* Include the utf8 package
|
||||
*/
|
||||
jimport('phputf8.utf8');
|
||||
jimport('phputf8.strcasecmp');
|
||||
|
||||
/**
|
||||
* String handling class for utf-8 data
|
||||
* Wraps the phputf8 library
|
||||
* All functions assume the validity of utf-8 strings.
|
||||
*
|
||||
* @package Joomla.Platform
|
||||
* @subpackage String
|
||||
* @since 11.1
|
||||
*/
|
||||
abstract class JString
|
||||
{
|
||||
/**
|
||||
* Increment styles.
|
||||
*
|
||||
* @var array
|
||||
* @since 11.3
|
||||
*/
|
||||
protected static $incrementStyles = array(
|
||||
'dash' => array(
|
||||
'#-(\d+)$#',
|
||||
'-%d'
|
||||
),
|
||||
'default' => array(
|
||||
array('#\((\d+)\)$#', '#\(\d+\)$#'),
|
||||
array(' (%d)', '(%d)'),
|
||||
),
|
||||
);
|
||||
|
||||
/**
|
||||
* Split a string in camel case format
|
||||
*
|
||||
* "FooBarABCDef" becomes array("Foo", "Bar", "ABC", "Def");
|
||||
* "JFooBar" becomes array("J", "Foo", "Bar");
|
||||
* "J001FooBar002" becomes array("J001", "Foo", "Bar002");
|
||||
* "abcDef" becomes array("abc", "Def");
|
||||
* "abc_defGhi_Jkl" becomes array("abc_def", "Ghi_Jkl");
|
||||
* "ThisIsA_NASAAstronaut" becomes array("This", "Is", "A_NASA", "Astronaut")),
|
||||
* "JohnFitzgerald_Kennedy" becomes array("John", "Fitzgerald_Kennedy")),
|
||||
*
|
||||
* @param string $string The source string.
|
||||
*
|
||||
* @return array The splitted string.
|
||||
*
|
||||
* @deprecated 12.3 (Platform) & 4.0 (CMS) - Use JStringNormalise::fromCamelCase()
|
||||
* @since 11.3
|
||||
*/
|
||||
public static function splitCamelCase($string)
|
||||
{
|
||||
JLog::add('JString::splitCamelCase has been deprecated. Use JStringNormalise::fromCamelCase.', JLog::WARNING, 'deprecated');
|
||||
|
||||
return JStringNormalise::fromCamelCase($string, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments a trailing number in a string.
|
||||
*
|
||||
* Used to easily create distinct labels when copying objects. The method has the following styles:
|
||||
*
|
||||
* default: "Label" becomes "Label (2)"
|
||||
* dash: "Label" becomes "Label-2"
|
||||
*
|
||||
* @param string $string The source string.
|
||||
* @param string $style The the style (default|dash).
|
||||
* @param integer $n If supplied, this number is used for the copy, otherwise it is the 'next' number.
|
||||
*
|
||||
* @return string The incremented string.
|
||||
*
|
||||
* @since 11.3
|
||||
*/
|
||||
public static function increment($string, $style = 'default', $n = 0)
|
||||
{
|
||||
$styleSpec = isset(self::$incrementStyles[$style]) ? self::$incrementStyles[$style] : self::$incrementStyles['default'];
|
||||
|
||||
// Regular expression search and replace patterns.
|
||||
if (is_array($styleSpec[0]))
|
||||
{
|
||||
$rxSearch = $styleSpec[0][0];
|
||||
$rxReplace = $styleSpec[0][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
$rxSearch = $rxReplace = $styleSpec[0];
|
||||
}
|
||||
|
||||
// New and old (existing) sprintf formats.
|
||||
if (is_array($styleSpec[1]))
|
||||
{
|
||||
$newFormat = $styleSpec[1][0];
|
||||
$oldFormat = $styleSpec[1][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
$newFormat = $oldFormat = $styleSpec[1];
|
||||
}
|
||||
|
||||
// Check if we are incrementing an existing pattern, or appending a new one.
|
||||
if (preg_match($rxSearch, $string, $matches))
|
||||
{
|
||||
$n = empty($n) ? ($matches[1] + 1) : $n;
|
||||
$string = preg_replace($rxReplace, sprintf($oldFormat, $n), $string);
|
||||
}
|
||||
else
|
||||
{
|
||||
$n = empty($n) ? 2 : $n;
|
||||
$string .= sprintf($newFormat, $n);
|
||||
}
|
||||
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to strpos.
|
||||
*
|
||||
* Find position of first occurrence of a string.
|
||||
*
|
||||
* @param string $str String being examined
|
||||
* @param string $search String being searched for
|
||||
* @param integer $offset Optional, specifies the position from which the search should be performed
|
||||
*
|
||||
* @return mixed Number of characters before the first match or FALSE on failure
|
||||
*
|
||||
* @see http://www.php.net/strpos
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strpos($str, $search, $offset = false)
|
||||
{
|
||||
if ($offset === false)
|
||||
{
|
||||
return utf8_strpos($str, $search);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_strpos($str, $search, $offset);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to strrpos
|
||||
* Finds position of last occurrence of a string
|
||||
*
|
||||
* @param string $str String being examined.
|
||||
* @param string $search String being searched for.
|
||||
* @param integer $offset Offset from the left of the string.
|
||||
*
|
||||
* @return mixed Number of characters before the last match or false on failure
|
||||
*
|
||||
* @see http://www.php.net/strrpos
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strrpos($str, $search, $offset = 0)
|
||||
{
|
||||
return utf8_strrpos($str, $search, $offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to substr
|
||||
* Return part of a string given character offset (and optionally length)
|
||||
*
|
||||
* @param string $str String being processed
|
||||
* @param integer $offset Number of UTF-8 characters offset (from left)
|
||||
* @param integer $length Optional length in UTF-8 characters from offset
|
||||
*
|
||||
* @return mixed string or FALSE if failure
|
||||
*
|
||||
* @see http://www.php.net/substr
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function substr($str, $offset, $length = false)
|
||||
{
|
||||
if ($length === false)
|
||||
{
|
||||
return utf8_substr($str, $offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_substr($str, $offset, $length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to strtlower
|
||||
*
|
||||
* Make a string lowercase
|
||||
* Note: The concept of a characters "case" only exists is some alphabets
|
||||
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
|
||||
* not exist in the Chinese alphabet, for example. See Unicode Standard
|
||||
* Annex #21: Case Mappings
|
||||
*
|
||||
* @param string $str String being processed
|
||||
*
|
||||
* @return mixed Either string in lowercase or FALSE is UTF-8 invalid
|
||||
*
|
||||
* @see http://www.php.net/strtolower
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strtolower($str)
|
||||
{
|
||||
return utf8_strtolower($str);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to strtoupper
|
||||
* Make a string uppercase
|
||||
* Note: The concept of a characters "case" only exists is some alphabets
|
||||
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
|
||||
* not exist in the Chinese alphabet, for example. See Unicode Standard
|
||||
* Annex #21: Case Mappings
|
||||
*
|
||||
* @param string $str String being processed
|
||||
*
|
||||
* @return mixed Either string in uppercase or FALSE is UTF-8 invalid
|
||||
*
|
||||
* @see http://www.php.net/strtoupper
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strtoupper($str)
|
||||
{
|
||||
return utf8_strtoupper($str);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to strlen.
|
||||
*
|
||||
* Returns the number of characters in the string (NOT THE NUMBER OF BYTES),
|
||||
*
|
||||
* @param string $str UTF-8 string.
|
||||
*
|
||||
* @return integer Number of UTF-8 characters in string.
|
||||
*
|
||||
* @see http://www.php.net/strlen
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strlen($str)
|
||||
{
|
||||
return utf8_strlen($str);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to str_ireplace
|
||||
* Case-insensitive version of str_replace
|
||||
*
|
||||
* @param string $search String to search
|
||||
* @param string $replace Existing string to replace
|
||||
* @param string $str New string to replace with
|
||||
* @param integer $count Optional count value to be passed by referene
|
||||
*
|
||||
* @return string UTF-8 String
|
||||
*
|
||||
* @see http://www.php.net/str_ireplace
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function str_ireplace($search, $replace, $str, $count = null)
|
||||
{
|
||||
jimport('phputf8.str_ireplace');
|
||||
|
||||
if ($count === false)
|
||||
{
|
||||
return utf8_ireplace($search, $replace, $str);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_ireplace($search, $replace, $str, $count);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to str_split
|
||||
* Convert a string to an array
|
||||
*
|
||||
* @param string $str UTF-8 encoded string to process
|
||||
* @param integer $split_len Number to characters to split string by
|
||||
*
|
||||
* @return array
|
||||
*
|
||||
* @see http://www.php.net/str_split
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function str_split($str, $split_len = 1)
|
||||
{
|
||||
jimport('phputf8.str_split');
|
||||
|
||||
return utf8_str_split($str, $split_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8/LOCALE aware alternative to strcasecmp
|
||||
* A case insensitive string comparison
|
||||
*
|
||||
* @param string $str1 string 1 to compare
|
||||
* @param string $str2 string 2 to compare
|
||||
* @param mixed $locale The locale used by strcoll or false to use classical comparison
|
||||
*
|
||||
* @return integer < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
|
||||
*
|
||||
* @see http://www.php.net/strcasecmp
|
||||
* @see http://www.php.net/strcoll
|
||||
* @see http://www.php.net/setlocale
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strcasecmp($str1, $str2, $locale = false)
|
||||
{
|
||||
if ($locale)
|
||||
{
|
||||
// Get current locale
|
||||
$locale0 = setlocale(LC_COLLATE, 0);
|
||||
|
||||
if (!$locale = setlocale(LC_COLLATE, $locale))
|
||||
{
|
||||
$locale = $locale0;
|
||||
}
|
||||
|
||||
// See if we have successfully set locale to UTF-8
|
||||
if (!stristr($locale, 'UTF-8') && stristr($locale, '_') && preg_match('~\.(\d+)$~', $locale, $m))
|
||||
{
|
||||
$encoding = 'CP' . $m[1];
|
||||
}
|
||||
elseif (stristr($locale, 'UTF-8') || stristr($locale, 'utf8'))
|
||||
{
|
||||
$encoding = 'UTF-8';
|
||||
}
|
||||
else
|
||||
{
|
||||
$encoding = 'nonrecodable';
|
||||
}
|
||||
|
||||
// If we successfully set encoding it to utf-8 or encoding is sth weird don't recode
|
||||
if ($encoding == 'UTF-8' || $encoding == 'nonrecodable')
|
||||
{
|
||||
return strcoll(utf8_strtolower($str1), utf8_strtolower($str2));
|
||||
}
|
||||
else
|
||||
{
|
||||
return strcoll(
|
||||
self::transcode(utf8_strtolower($str1), 'UTF-8', $encoding),
|
||||
self::transcode(utf8_strtolower($str2), 'UTF-8', $encoding)
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_strcasecmp($str1, $str2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8/LOCALE aware alternative to strcmp
|
||||
* A case sensitive string comparison
|
||||
*
|
||||
* @param string $str1 string 1 to compare
|
||||
* @param string $str2 string 2 to compare
|
||||
* @param mixed $locale The locale used by strcoll or false to use classical comparison
|
||||
*
|
||||
* @return integer < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
|
||||
*
|
||||
* @see http://www.php.net/strcmp
|
||||
* @see http://www.php.net/strcoll
|
||||
* @see http://www.php.net/setlocale
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strcmp($str1, $str2, $locale = false)
|
||||
{
|
||||
if ($locale)
|
||||
{
|
||||
// Get current locale
|
||||
$locale0 = setlocale(LC_COLLATE, 0);
|
||||
|
||||
if (!$locale = setlocale(LC_COLLATE, $locale))
|
||||
{
|
||||
$locale = $locale0;
|
||||
}
|
||||
|
||||
// See if we have successfully set locale to UTF-8
|
||||
if (!stristr($locale, 'UTF-8') && stristr($locale, '_') && preg_match('~\.(\d+)$~', $locale, $m))
|
||||
{
|
||||
$encoding = 'CP' . $m[1];
|
||||
}
|
||||
elseif (stristr($locale, 'UTF-8') || stristr($locale, 'utf8'))
|
||||
{
|
||||
$encoding = 'UTF-8';
|
||||
}
|
||||
else
|
||||
{
|
||||
$encoding = 'nonrecodable';
|
||||
}
|
||||
|
||||
// If we successfully set encoding it to utf-8 or encoding is sth weird don't recode
|
||||
if ($encoding == 'UTF-8' || $encoding == 'nonrecodable')
|
||||
{
|
||||
return strcoll($str1, $str2);
|
||||
}
|
||||
else
|
||||
{
|
||||
return strcoll(self::transcode($str1, 'UTF-8', $encoding), self::transcode($str2, 'UTF-8', $encoding));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return strcmp($str1, $str2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to strcspn
|
||||
* Find length of initial segment not matching mask
|
||||
*
|
||||
* @param string $str The string to process
|
||||
* @param string $mask The mask
|
||||
* @param integer $start Optional starting character position (in characters)
|
||||
* @param integer $length Optional length
|
||||
*
|
||||
* @return integer The length of the initial segment of str1 which does not contain any of the characters in str2
|
||||
*
|
||||
* @see http://www.php.net/strcspn
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strcspn($str, $mask, $start = null, $length = null)
|
||||
{
|
||||
jimport('phputf8.strcspn');
|
||||
|
||||
if ($start === false && $length === false)
|
||||
{
|
||||
return utf8_strcspn($str, $mask);
|
||||
}
|
||||
elseif ($length === false)
|
||||
{
|
||||
return utf8_strcspn($str, $mask, $start);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_strcspn($str, $mask, $start, $length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to stristr
|
||||
* Returns all of haystack from the first occurrence of needle to the end.
|
||||
* needle and haystack are examined in a case-insensitive manner
|
||||
* Find first occurrence of a string using case insensitive comparison
|
||||
*
|
||||
* @param string $str The haystack
|
||||
* @param string $search The needle
|
||||
*
|
||||
* @return string the sub string
|
||||
*
|
||||
* @see http://www.php.net/stristr
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function stristr($str, $search)
|
||||
{
|
||||
jimport('phputf8.stristr');
|
||||
|
||||
return utf8_stristr($str, $search);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to strrev
|
||||
* Reverse a string
|
||||
*
|
||||
* @param string $str String to be reversed
|
||||
*
|
||||
* @return string The string in reverse character order
|
||||
*
|
||||
* @see http://www.php.net/strrev
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strrev($str)
|
||||
{
|
||||
jimport('phputf8.strrev');
|
||||
|
||||
return utf8_strrev($str);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to strspn
|
||||
* Find length of initial segment matching mask
|
||||
*
|
||||
* @param string $str The haystack
|
||||
* @param string $mask The mask
|
||||
* @param integer $start Start optional
|
||||
* @param integer $length Length optional
|
||||
*
|
||||
* @return integer
|
||||
*
|
||||
* @see http://www.php.net/strspn
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function strspn($str, $mask, $start = null, $length = null)
|
||||
{
|
||||
jimport('phputf8.strspn');
|
||||
|
||||
if ($start === null && $length === null)
|
||||
{
|
||||
return utf8_strspn($str, $mask);
|
||||
}
|
||||
elseif ($length === null)
|
||||
{
|
||||
return utf8_strspn($str, $mask, $start);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_strspn($str, $mask, $start, $length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware substr_replace
|
||||
* Replace text within a portion of a string
|
||||
*
|
||||
* @param string $str The haystack
|
||||
* @param string $repl The replacement string
|
||||
* @param integer $start Start
|
||||
* @param integer $length Length (optional)
|
||||
*
|
||||
* @return string
|
||||
*
|
||||
* @see http://www.php.net/substr_replace
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function substr_replace($str, $repl, $start, $length = null)
|
||||
{
|
||||
// Loaded by library loader
|
||||
if ($length === false)
|
||||
{
|
||||
return utf8_substr_replace($str, $repl, $start);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_substr_replace($str, $repl, $start, $length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware replacement for ltrim()
|
||||
*
|
||||
* Strip whitespace (or other characters) from the beginning of a string
|
||||
* You only need to use this if you are supplying the charlist
|
||||
* optional arg and it contains UTF-8 characters. Otherwise ltrim will
|
||||
* work normally on a UTF-8 string
|
||||
*
|
||||
* @param string $str The string to be trimmed
|
||||
* @param string $charlist The optional charlist of additional characters to trim
|
||||
*
|
||||
* @return string The trimmed string
|
||||
*
|
||||
* @see http://www.php.net/ltrim
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function ltrim($str, $charlist = false)
|
||||
{
|
||||
if (empty($charlist) && $charlist !== false)
|
||||
{
|
||||
return $str;
|
||||
}
|
||||
|
||||
jimport('phputf8.trim');
|
||||
|
||||
if ($charlist === false)
|
||||
{
|
||||
return utf8_ltrim($str);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_ltrim($str, $charlist);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware replacement for rtrim()
|
||||
* Strip whitespace (or other characters) from the end of a string
|
||||
* You only need to use this if you are supplying the charlist
|
||||
* optional arg and it contains UTF-8 characters. Otherwise rtrim will
|
||||
* work normally on a UTF-8 string
|
||||
*
|
||||
* @param string $str The string to be trimmed
|
||||
* @param string $charlist The optional charlist of additional characters to trim
|
||||
*
|
||||
* @return string The trimmed string
|
||||
*
|
||||
* @see http://www.php.net/rtrim
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function rtrim($str, $charlist = false)
|
||||
{
|
||||
if (empty($charlist) && $charlist !== false)
|
||||
{
|
||||
return $str;
|
||||
}
|
||||
|
||||
jimport('phputf8.trim');
|
||||
|
||||
if ($charlist === false)
|
||||
{
|
||||
return utf8_rtrim($str);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_rtrim($str, $charlist);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware replacement for trim()
|
||||
* Strip whitespace (or other characters) from the beginning and end of a string
|
||||
* Note: you only need to use this if you are supplying the charlist
|
||||
* optional arg and it contains UTF-8 characters. Otherwise trim will
|
||||
* work normally on a UTF-8 string
|
||||
*
|
||||
* @param string $str The string to be trimmed
|
||||
* @param string $charlist The optional charlist of additional characters to trim
|
||||
*
|
||||
* @return string The trimmed string
|
||||
*
|
||||
* @see http://www.php.net/trim
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function trim($str, $charlist = false)
|
||||
{
|
||||
if (empty($charlist) && $charlist !== false)
|
||||
{
|
||||
return $str;
|
||||
}
|
||||
|
||||
jimport('phputf8.trim');
|
||||
|
||||
if ($charlist === false)
|
||||
{
|
||||
return utf8_trim($str);
|
||||
}
|
||||
else
|
||||
{
|
||||
return utf8_trim($str, $charlist);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to ucfirst
|
||||
* Make a string's first character uppercase or all words' first character uppercase
|
||||
*
|
||||
* @param string $str String to be processed
|
||||
* @param string $delimiter The words delimiter (null means do not split the string)
|
||||
* @param string $newDelimiter The new words delimiter (null means equal to $delimiter)
|
||||
*
|
||||
* @return string If $delimiter is null, return the string with first character as upper case (if applicable)
|
||||
* else consider the string of words separated by the delimiter, apply the ucfirst to each words
|
||||
* and return the string with the new delimiter
|
||||
*
|
||||
* @see http://www.php.net/ucfirst
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function ucfirst($str, $delimiter = null, $newDelimiter = null)
|
||||
{
|
||||
jimport('phputf8.ucfirst');
|
||||
|
||||
if ($delimiter === null)
|
||||
{
|
||||
return utf8_ucfirst($str);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ($newDelimiter === null)
|
||||
{
|
||||
$newDelimiter = $delimiter;
|
||||
}
|
||||
return implode($newDelimiter, array_map('utf8_ucfirst', explode($delimiter, $str)));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to ucwords
|
||||
* Uppercase the first character of each word in a string
|
||||
*
|
||||
* @param string $str String to be processed
|
||||
*
|
||||
* @return string String with first char of each word uppercase
|
||||
*
|
||||
* @see http://www.php.net/ucwords
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function ucwords($str)
|
||||
{
|
||||
jimport('phputf8.ucwords');
|
||||
|
||||
return utf8_ucwords($str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transcode a string.
|
||||
*
|
||||
* @param string $source The string to transcode.
|
||||
* @param string $from_encoding The source encoding.
|
||||
* @param string $to_encoding The target encoding.
|
||||
*
|
||||
* @return mixed The transcoded string, or null if the source was not a string.
|
||||
*
|
||||
* @link https://bugs.php.net/bug.php?id=48147
|
||||
*
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function transcode($source, $from_encoding, $to_encoding)
|
||||
{
|
||||
if (is_string($source))
|
||||
{
|
||||
switch (ICONV_IMPL)
|
||||
{
|
||||
case 'glibc':
|
||||
return @iconv($from_encoding, $to_encoding . '//TRANSLIT,IGNORE', $source);
|
||||
case 'libiconv':
|
||||
default:
|
||||
return iconv($from_encoding, $to_encoding . '//IGNORE//TRANSLIT', $source);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a string as to whether it's valid UTF-8 and supported by the Unicode standard.
|
||||
*
|
||||
* Note: this function has been modified to simple return true or false.
|
||||
*
|
||||
* @param string $str UTF-8 encoded string.
|
||||
*
|
||||
* @return boolean true if valid
|
||||
*
|
||||
* @author <hsivonen@iki.fi>
|
||||
* @see http://hsivonen.iki.fi/php-utf8/
|
||||
* @see compliant
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function valid($str)
|
||||
{
|
||||
// Cached expected number of octets after the current octet
|
||||
// until the beginning of the next UTF8 character sequence
|
||||
$mState = 0;
|
||||
|
||||
// Cached Unicode character
|
||||
$mUcs4 = 0;
|
||||
|
||||
// Cached expected number of octets in the current sequence
|
||||
$mBytes = 1;
|
||||
|
||||
$len = strlen($str);
|
||||
|
||||
for ($i = 0; $i < $len; $i++)
|
||||
{
|
||||
$in = ord($str{$i});
|
||||
|
||||
if ($mState == 0)
|
||||
{
|
||||
// When mState is zero we expect either a US-ASCII character or a
|
||||
// multi-octet sequence.
|
||||
if (0 == (0x80 & ($in)))
|
||||
{
|
||||
// US-ASCII, pass straight through.
|
||||
$mBytes = 1;
|
||||
}
|
||||
elseif (0xC0 == (0xE0 & ($in)))
|
||||
{
|
||||
// First octet of 2 octet sequence
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 0x1F) << 6;
|
||||
$mState = 1;
|
||||
$mBytes = 2;
|
||||
}
|
||||
elseif (0xE0 == (0xF0 & ($in)))
|
||||
{
|
||||
// First octet of 3 octet sequence
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 0x0F) << 12;
|
||||
$mState = 2;
|
||||
$mBytes = 3;
|
||||
}
|
||||
elseif (0xF0 == (0xF8 & ($in)))
|
||||
{
|
||||
// First octet of 4 octet sequence
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 0x07) << 18;
|
||||
$mState = 3;
|
||||
$mBytes = 4;
|
||||
}
|
||||
elseif (0xF8 == (0xFC & ($in)))
|
||||
{
|
||||
/* First octet of 5 octet sequence.
|
||||
*
|
||||
* This is illegal because the encoded codepoint must be either
|
||||
* (a) not the shortest form or
|
||||
* (b) outside the Unicode range of 0-0x10FFFF.
|
||||
* Rather than trying to resynchronize, we will carry on until the end
|
||||
* of the sequence and let the later error handling code catch it.
|
||||
*/
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 0x03) << 24;
|
||||
$mState = 4;
|
||||
$mBytes = 5;
|
||||
}
|
||||
elseif (0xFC == (0xFE & ($in)))
|
||||
{
|
||||
// First octet of 6 octet sequence, see comments for 5 octet sequence.
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 1) << 30;
|
||||
$mState = 5;
|
||||
$mBytes = 6;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Current octet is neither in the US-ASCII range nor a legal first
|
||||
* octet of a multi-octet sequence.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// When mState is non-zero, we expect a continuation of the multi-octet
|
||||
// sequence
|
||||
if (0x80 == (0xC0 & ($in)))
|
||||
{
|
||||
// Legal continuation.
|
||||
$shift = ($mState - 1) * 6;
|
||||
$tmp = $in;
|
||||
$tmp = ($tmp & 0x0000003F) << $shift;
|
||||
$mUcs4 |= $tmp;
|
||||
|
||||
/**
|
||||
* End of the multi-octet sequence. mUcs4 now contains the final
|
||||
* Unicode codepoint to be output
|
||||
*/
|
||||
if (0 == --$mState)
|
||||
{
|
||||
/*
|
||||
* Check for illegal sequences and codepoints.
|
||||
*/
|
||||
// From Unicode 3.1, non-shortest form is illegal
|
||||
if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || ((3 == $mBytes) && ($mUcs4 < 0x0800)) || ((4 == $mBytes) && ($mUcs4 < 0x10000))
|
||||
|| (4 < $mBytes)
|
||||
|| (($mUcs4 & 0xFFFFF800) == 0xD800) // From Unicode 3.2, surrogate characters are illegal
|
||||
|| ($mUcs4 > 0x10FFFF)) // Codepoints outside the Unicode range are illegal
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initialize UTF8 cache.
|
||||
$mState = 0;
|
||||
$mUcs4 = 0;
|
||||
$mBytes = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
*((0xC0 & (*in) != 0x80) && (mState != 0))
|
||||
* Incomplete multi-octet sequence.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests whether a string complies as UTF-8. This will be much
|
||||
* faster than utf8_is_valid but will pass five and six octet
|
||||
* UTF-8 sequences, which are not supported by Unicode and
|
||||
* so cannot be displayed correctly in a browser. In other words
|
||||
* it is not as strict as utf8_is_valid but it's faster. If you use
|
||||
* it to validate user input, you place yourself at the risk that
|
||||
* attackers will be able to inject 5 and 6 byte sequences (which
|
||||
* may or may not be a significant risk, depending on what you are
|
||||
* are doing)
|
||||
*
|
||||
* @param string $str UTF-8 string to check
|
||||
*
|
||||
* @return boolean TRUE if string is valid UTF-8
|
||||
*
|
||||
* @see valid
|
||||
* @see http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function compliant($str)
|
||||
{
|
||||
if (strlen($str) == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If even just the first character can be matched, when the /u
|
||||
* modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
|
||||
* invalid, nothing at all will match, even if the string contains
|
||||
* some valid sequences
|
||||
*/
|
||||
return (preg_match('/^.{1}/us', $str, $ar) == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Does a UTF-8 safe version of PHP parse_url function
|
||||
*
|
||||
* @param string $url URL to parse
|
||||
*
|
||||
* @return mixed Associative array or false if badly formed URL.
|
||||
*
|
||||
* @see http://us3.php.net/manual/en/function.parse-url.php
|
||||
* @since 11.1
|
||||
*/
|
||||
public static function parse_url($url)
|
||||
{
|
||||
$result = false;
|
||||
|
||||
// Build arrays of values we need to decode before parsing
|
||||
$entities = array('%21', '%2A', '%27', '%28', '%29', '%3B', '%3A', '%40', '%26', '%3D', '%24', '%2C', '%2F', '%3F', '%23', '%5B', '%5D');
|
||||
$replacements = array('!', '*', "'", "(", ")", ";", ":", "@", "&", "=", "$", ",", "/", "?", "#", "[", "]");
|
||||
|
||||
// Create encoded URL with special URL characters decoded so it can be parsed
|
||||
// All other characters will be encoded
|
||||
$encodedURL = str_replace($entities, $replacements, urlencode($url));
|
||||
|
||||
// Parse the encoded URL
|
||||
$encodedParts = parse_url($encodedURL);
|
||||
|
||||
// Now, decode each value of the resulting array
|
||||
if ($encodedParts)
|
||||
{
|
||||
foreach ($encodedParts as $key => $value)
|
||||
{
|
||||
$result[$key] = urldecode(str_replace($replacements, $entities, $value));
|
||||
}
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user