streams/Code/Lib/LanguageDetect.php

52 lines
1.5 KiB
PHP
Raw Normal View History

2022-07-05 01:34:07 +00:00
<?php
namespace Code\Lib;
require_once('library/text_languagedetect/Text/LanguageDetect.php');
use Text_LanguageDetect;
2022-08-20 23:11:42 +00:00
use Text_LanguageDetect_Exception;
2022-07-05 01:34:07 +00:00
/**
* @see http://pear.php.net/package/Text_LanguageDetect
* @param string $s A string to examine
* @return string Language code in 2-letter ISO 639-1 (en, de, fr) format
*
* @TODO: The PEAR library is no longer being maintained and has had recent issues loading with composer (2020-06-29).
* This project: https://github.com/patrickschur/language-detection *may* be useful as a replacement.
*/
class LanguageDetect
{
const MINLENGTH = 48;
const MINCONFIDENCE = 0.01;
2022-07-05 10:20:26 +00:00
/**
* Detect language from provided string.
* When successful, return the 2-letter language code.
* Lack of confidence in the result returns empty string.
*/
public function detect(string $string,
int $minlength = self::MINLENGTH,
float $confidence = self::MINCONFIDENCE) : string
2022-07-05 01:34:07 +00:00
{
$detector = new Text_LanguageDetect();
2022-07-05 10:20:26 +00:00
if (mb_strlen($string) < $minlength) {
2022-07-05 01:34:07 +00:00
return '';
}
try {
2022-07-05 10:20:26 +00:00
// return 2-letter ISO 639-1 language code (e.g. 'en')
2022-07-05 01:34:07 +00:00
$detector->setNameMode(2);
$result = $detector->detectConfidence($string);
2022-07-05 10:20:26 +00:00
if (isset($result['language']) && $result['confidence'] >= $confidence) {
return $result['language'];
}
2022-07-05 01:34:07 +00:00
} catch (Text_LanguageDetect_Exception $e) {
2022-10-09 11:04:11 +00:00
logger('LanguageDetect Exception: ' . $e->getMessage());
2022-07-05 10:20:26 +00:00
}
return '';
2022-07-05 01:34:07 +00:00
}
2022-07-05 10:20:26 +00:00
}