Use ISO-639-1 for the language detection

This commit is contained in:
Michael 2023-11-02 22:49:25 +00:00
parent 74b4eddcf7
commit 3f2b0b9422
7 changed files with 113 additions and 114 deletions

View file

@ -2034,15 +2034,12 @@ class Item
return [];
}
$availableLanguages = DI::l10n()->getAvailableLanguages(true);
$availableLanguages = DI::l10n()->convertForLanguageDetection($availableLanguages);
$ld = new Language(array_keys($availableLanguages));
$ld = new Language(DI::l10n()->getDetectableLanguages());
$result = [];
foreach (self::splitByBlocks($searchtext) as $block) {
$languages = $ld->detect($block)->limit(0, $count)->close() ?: [];
$languages = $ld->detect($block)->close() ?: [];
$data = [
'text' => $block,
@ -2057,10 +2054,32 @@ class Item
}
}
arsort($result);
$result = array_slice($result, 0, $count);
$result = self::compactLanguages($result);
return $result;
arsort($result);
return array_slice($result, 0, $count);
}
/**
* Concert the language code in the detection result to ISO 639-1.
* On duplicates the system uses the higher quality value.
*
* @param array $result
* @return array
*/
private static function compactLanguages(array $result): array
{
$languages = [];
foreach ($result as $language => $quality) {
if ($quality == 0) {
continue;
}
$code = DI::l10n()->toISO6391($language);
if (empty($languages[$code]) || ($languages[$code] < $quality)) {
$languages[$code] = $quality;
}
}
return $languages;
}
/**