An array of things that should replaced by the separator.
* @param bool $merge Keep the previous (default) array-to-separator array.
*
* @return void
*
* @psalm-param string[] $array
*/
public static function add_array_to_separator(array $array, bool $merge = true)
{
if ($merge === true) {
self::$arrayToSeparator = \array_unique(
\array_merge(
self::$arrayToSeparator,
$array
)
);
} else {
self::$arrayToSeparator = $array;
}
}
/**
* Add new characters to the list. `$map` should be a hash.
*
* @param array $map
* @param string|null $language
*
* @return void
*
* @psalm-param array $map
*/
public static function add_chars(array $map, string $language = null)
{
$language_key = $language ?? \uniqid('urlify', true);
if (isset(self::$maps[$language_key])) {
self::$maps[$language_key] = \array_merge($map, self::$maps[$language_key]);
} else {
self::$maps[$language_key] = $map;
}
}
/**
* @return void
*/
public static function reset_chars()
{
self::$maps = [];
}
/**
* Transliterates characters to their ASCII equivalents.
* $language specifies a priority for a specific language.
* The latter is useful if languages have different rules for the same character.
*
* @param string $string The input string.
* @param string $language Your primary language.
* @param string $unknown Character use if character unknown. (default is ?).
*
* @return string
*/
public static function downcode(
string $string,
string $language = 'en',
string $unknown = ''
): string {
$string = self::expandString($string, $language);
foreach (self::$maps as $mapsInner) {
foreach ($mapsInner as $orig => $replace) {
$string = \str_replace($orig, $replace, $string);
}
}
$string = \voku\helper\ASCII::to_ascii(
$string,
$language,
false,
true
);
return \voku\helper\ASCII::to_transliterate(
$string,
$unknown,
false
);
}
/**
* Convert a String to URL.
*
* e.g.: "Petty
theft" to "Petty-theft"
*
* @param string $string The text you want to convert.
* @param int $maxLength Max. length of the output string, set to "0" (zero) to
* disable it
* @param string $language The language you want to convert to.
* @param bool $fileName
* Keep the "." from the extension e.g.: "imaäe.jpg" =>
* "image.jpg"
*
* @param bool $removeWords
* Remove some "words" from the string.
* Info: Set extra words via remove_words().
*
* @param bool $strToLower Use strtolower() at the end.
* @param bool|string $separator Define a new separator for the words.
*
* @return string
*/
public static function filter(
string $string,
int $maxLength = 200,
string $language = 'en',
bool $fileName = false,
bool $removeWords = false,
bool $strToLower = true,
$separator = '-'
): string {
if ($string === '') {
return '';
}
// fallback
if ($language === '') {
$language = 'en';
}
// separator-fallback
if ($separator === false) {
$separator = '_';
}
if ($separator === true || $separator === '') {
$separator = '-';
}
// escaped separator
$separatorEscaped = \preg_quote($separator, '/');
// use defaults, if there are no values
if (self::$arrayToSeparator === []) {
self::reset_array_to_separator();
}
// remove apostrophes which are not used as quotes around a string
if (\strpos($string, "'") !== false) {
$stringTmp = \preg_replace("/(\w)'(\w)/u", '${1}${2}', $string);
if ($stringTmp !== null) {
$string = (string) $stringTmp;
}
}
// replace with $separator
$string = (string) \preg_replace(
self::$arrayToSeparator,
$separator,
$string
);
// remove all other html-tags
if (
\strpos($string, '<') !== false
||
\strpos($string, '>') !== false
) {
$string = \strip_tags($string);
}
// use special language replacer
$string = self::downcode($string, $language);
// replace with $separator, again
$string = (string) \preg_replace(
self::$arrayToSeparator,
$separator,
$string
);
// remove all these words from the string before urlifying
$removeWordsSearch = '//';
if ($removeWords === true) {
$removeList = self::get_remove_list($language);
if ($removeList !== []) {
$removeWordsSearch = '/\b(?:' . \implode('|', $removeList) . ')\b/ui';
}
}
// keep the "." from e.g.: a file-extension?
if ($fileName) {
$removePatternAddOn = '.';
} else {
$removePatternAddOn = '';
}
$string = (string) \preg_replace(
[
// 1) remove un-needed chars
'/[^' . $separatorEscaped . $removePatternAddOn . '\-a-zA-Z0-9\s]/u',
// 2) convert spaces to $separator
'/[\s]+/u',
// 3) remove some extras words
$removeWordsSearch,
// 4) remove double $separator's
'/[' . ($separatorEscaped ?: ' ') . ']+/u',
// 5) remove $separator at the end
'/[' . ($separatorEscaped ?: ' ') . ']+$/u',
],
[
'',
$separator,
'',
$separator,
'',
],
$string
);
// "substr" only if "$length" is set
if (
$maxLength
&&
$maxLength > 0
&&
\strlen($string) > $maxLength
) {
$string = (string) \substr(\trim($string, $separator), 0, $maxLength);
}
// convert to lowercase
if ($strToLower === true) {
$string = \strtolower($string);
}
// trim "$separator" from beginning and end of the string
return \trim($string, $separator);
}
/**
* Append words to the remove list. Accepts either single words or an array of words.
*
* @param string|string[] $words
* @param string $language
* @param bool $merge Keep the previous (default) remove-words array.
*
* @return void
*/
public static function remove_words($words, string $language = 'en', bool $merge = true)
{
if (\is_array($words) === false) {
$words = [$words];
}
foreach ($words as $removeWordKey => $removeWord) {
$words[$removeWordKey] = \preg_quote($removeWord, '/');
}
if ($merge === true) {
self::$remove_list[$language] = \array_unique(
\array_merge(
self::get_remove_list($language),
$words
)
);
} else {
self::$remove_list[$language] = $words;
}
}
/**
* Reset the internal "self::$arrayToSeparator" to the default values.
*
* @return void
*/
public static function reset_array_to_separator()
{
self::$arrayToSeparator = [
'/"|&|<|>|–|—/i', // ", &, <, >, –, —
'/⁻|-|—|_|"|`|´|\'/',
"#/\r\n|\r|\n|#isU",
];
}
/**
* reset the word-remove-array
*
* @param string $language
*
* @return void
*/
public static function reset_remove_list(string $language = 'en')
{
if ($language === '') {
return;
}
$language_orig = $language;
$language = self::get_language_for_reset_remove_list($language);
if ($language === '') {
return;
}
$stopWords = new \voku\helper\StopWords();
try {
self::$remove_list[$language_orig] = $stopWords->getStopWordsFromLanguage($language);
} catch (\voku\helper\StopWordsLanguageNotExists $e) {
self::$remove_list[$language_orig] = [];
}
}
/**
* Alias of `URLify::downcode()`.
*
* @param string $string
* @param string $language
*
* @return string
*/
public static function transliterate(string $string, string $language = 'en'): string
{
return self::downcode($string, $language);
}
/**
* Expands the given string replacing some special parts for words.
* e.g. "lorem@ipsum.com" is replaced by "lorem at ipsum dot com".
*
* Most of these transformations have been inspired by the pelle/slugger
* project, distributed under the Eclipse Public License.
* Copyright 2012 Pelle Braendgaard
*
* @param string $string The string to expand
* @param string $language
*
* @return string The result of expanding the string
*/
protected static function expandString(string $string, string $language = 'en'): string
{
$string = self::expandCurrencies($string, $language);
return self::expandSymbols($string, $language);
}
/**
* @param string $language
*
* @return string
*/
private static function get_language_for_reset_remove_list(string $language)
{
if ($language === '') {
return '';
}
if (
\strpos($language, '_') === false
&&
\strpos($language, '-') === false
) {
$language = \strtolower($language);
} else {
$regex = '/(?[a-z]{2}).*/i';
$language = \strtolower((string) \preg_replace($regex, '$1', $language));
}
return $language;
}
/**
* Expands the numeric currencies in euros, dollars, pounds
* and yens that the given string may include.
*
* @param string $string
* @param string $language
*
* @return string
*/
private static function expandCurrencies(string $string, string $language = 'en')
{
if (
\strpos($string, '€') === false
&&
\strpos($string, '$') === false
&&
\strpos($string, '£') === false
&&
\strpos($string, '¥') === false
) {
return $string;
}
if ($language === 'de') {
return (string) \preg_replace(
[
'/(?:\s|^)(\d+)(?: )*€(?:\s|$)/',
'/(?:\s|^)\$(?: )*(\d+)(?:\s|$)/',
'/(?:\s|^)£(?: )*(\d+)(?:\s|$)/',
'/(?:\s|^)¥(?: )*(\d+)(?:\s|$)/',
'/(?:\s|^)(\d+)[.|,](\d+)(?: )*€(?:\s|$)/',
'/(?:\s|^)\$(?: )*(\d+)[.|,](\d+)(?:\s|$)/',
'/(?:\s|^)£(?: )*(\d+)[.|,](\d+)(?:\s|$)/',
],
[
' \1 Euro ',
' \1 Dollar ',
' \1 Pound ',
' \1 Yen ',
' \1 Euro \2 Cent ',
' \1 Dollar \2 Cent ',
' \1 Pound \2 Pence ',
],
$string
);
}
return (string) \preg_replace(
[
'/(?:\s|^)1(?: )*€(?:\s|$)/',
'/(?:\s|^)(\d+)(?: )*€(?:\s|$)/',
'/(?:\s|^)\$(?: )*1(?:\s|$)/',
'/(?:\s|^)\$(?: )*(\d+)(?:\s|$)/',
'/(?:\s|^)£(?: )*1(?:\s|$)/',
'/(?:\s|^)£(?: )*(\d+)(?:\s|$)/',
'/(?:\s|^)¥(?: )*(\d+)(?:\s|$)/',
'/(?:\s|^)1[.|,](\d+)(?: )*€(?:\s|$)/',
'/(?:\s|^)(\d+)[.|,](\d+)(?: )*€(?:\s|$)/',
'/(?:\s|^)1[.|,](\d+)(?: )*$(?:\s|$)/',
'/(?:\s|^)\$(?: )*(\d+)[.|,](\d+)(?:\s|$)/',
'/(?:\s|^)1[.|,](\d+)(?: )*£(?:\s|$)/',
'/(?:\s|^)£(?: )*(\d+)[.|,](\d+)(?:\s|$)/',
],
[
' 1 Euro ',
' \1 Euros ',
' 1 Dollar ',
' \1 Dollars ',
' 1 Pound ',
' \1 Pounds ',
' \1 Yen ',
' 1 Euros \1 Cents ',
' \1 Euros \2 Cents ',
' 1 Dollars \1 Cents ',
' \1 Dollars \2 Cents ',
' 1 Pounds \1 Pence ',
' \1 Pounds \2 Pence ',
],
$string
);
}
/**
* Expands the special symbols that the given string may include, such as '@', '.', '#' and '%'.
*
* @param string $string
* @param string $language
*
* @return string
*/
private static function expandSymbols(string $string, string $language = 'en')
{
if (
\strpos($string, '©') === false
&&
\strpos($string, '®') === false
&&
\strpos($string, '@') === false
&&
\strpos($string, '&') === false
&&
\strpos($string, '%') === false
&&
\strpos($string, '=') === false
) {
return $string;
}
$maps = \voku\helper\ASCII::charsArray(true);
return (string) \preg_replace(
[
'/\s*©\s*/',
'/\s*®\s*/',
'/\s*@\s*/',
'/\s*&\s*/',
'/\s*%\s*/',
'/(\s*=\s*)/',
],
[
$maps['latin_symbols']['©'],
$maps['latin_symbols']['®'],
$maps['latin_symbols']['@'],
$maps[$language]['&'] ?? '&',
$maps[$language]['%'] ?? '%',
$maps[$language]['='] ?? '=',
],
$string
);
}
/**
* return the "self::$remove_list[$language]" array
*
* @param string $language
*
* @return array
*/
private static function get_remove_list(string $language = 'en')
{
// check for language
if ($language === '') {
return [];
}
// set remove-array
if (!isset(self::$remove_list[$language])) {
self::reset_remove_list($language);
}
// check for array
if (
!isset(self::$remove_list[$language])
||
empty(self::$remove_list[$language])
) {
return [];
}
return self::$remove_list[$language];
}
}