mirror of
https://codeberg.org/streams/streams.git
synced 2024-09-19 15:55:13 +00:00
fix tests and update languagedetect library (discovered in tests)
This commit is contained in:
parent
3a15a5b0a2
commit
a524b5e86b
20 changed files with 2365 additions and 19 deletions
|
@ -38,7 +38,7 @@ class Config
|
||||||
|
|
||||||
if (! array_key_exists('config_loaded', App::$config[$family])) {
|
if (! array_key_exists('config_loaded', App::$config[$family])) {
|
||||||
$r = q("SELECT * FROM config WHERE cat = '%s'", dbesc($family));
|
$r = q("SELECT * FROM config WHERE cat = '%s'", dbesc($family));
|
||||||
if ($r === false && !App::$install) {
|
if ($r === false && !App::$install && !defined('\UNIT_TESTING')) {
|
||||||
sleep(3);
|
sleep(3);
|
||||||
$recursionCounter ++;
|
$recursionCounter ++;
|
||||||
if ($recursionCounter > 10) {
|
if ($recursionCounter > 10) {
|
||||||
|
|
4
boot.php
4
boot.php
|
@ -89,9 +89,9 @@ function sys_boot() {
|
||||||
* Try to open the database;
|
* Try to open the database;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (! App::$install) {
|
if (!App::$install) {
|
||||||
DBA::dba_factory($db_host, $db_port, $db_user, $db_pass, $db_data, $db_type, App::$install);
|
DBA::dba_factory($db_host, $db_port, $db_user, $db_pass, $db_data, $db_type, App::$install);
|
||||||
if (! DBA::$dba->connected && !defined('UNIT_TEST')) {
|
if (!DBA::$dba->connected && !defined('\UNIT_TESTING')) {
|
||||||
system_unavailable();
|
system_unavailable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -719,10 +719,15 @@ function logger($msg, $level = LOGGER_NORMAL, $priority = LOG_INFO)
|
||||||
$debugging = true;
|
$debugging = true;
|
||||||
$logfile = 'install.log';
|
$logfile = 'install.log';
|
||||||
$loglevel = LOGGER_ALL;
|
$loglevel = LOGGER_ALL;
|
||||||
} else {
|
} elseif (DBA::$dba && DBA::$dba->connected) {
|
||||||
$debugging = get_config('system', 'debugging');
|
$debugging = get_config('system', 'debugging');
|
||||||
$loglevel = intval(get_config('system', 'loglevel'));
|
$loglevel = intval(get_config('system', 'loglevel'));
|
||||||
$logfile = get_config('system', 'logfile');
|
$logfile = get_config('system', 'logfile');
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$debugging = false;
|
||||||
|
$logfile = null;
|
||||||
|
$loglevel = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((! $debugging) || (! $logfile) || ($level > $loglevel)) {
|
if ((! $debugging) || (! $logfile) || ($level > $loglevel)) {
|
||||||
|
|
|
@ -155,3 +155,12 @@ Unit test status
|
||||||
|
|
||||||
.. image:: https://travis-ci.org/pear/Text_LanguageDetect.svg?branch=master
|
.. image:: https://travis-ci.org/pear/Text_LanguageDetect.svg?branch=master
|
||||||
:target: https://travis-ci.org/pear/Text_LanguageDetect
|
:target: https://travis-ci.org/pear/Text_LanguageDetect
|
||||||
|
|
||||||
|
|
||||||
|
Notes
|
||||||
|
=====
|
||||||
|
Where are the data from?
|
||||||
|
|
||||||
|
I don't recall where I got the original data set.
|
||||||
|
It's just the frequencies of 3-letter combinations in each supported language.
|
||||||
|
It could be generated from a few random wikipedia pages from each language.
|
||||||
|
|
|
@ -16,6 +16,7 @@ require_once 'library/text_languagedetect/Text/LanguageDetect/Exception.php';
|
||||||
require_once 'library/text_languagedetect/Text/LanguageDetect/Parser.php';
|
require_once 'library/text_languagedetect/Text/LanguageDetect/Parser.php';
|
||||||
require_once 'library/text_languagedetect/Text/LanguageDetect/ISO639.php';
|
require_once 'library/text_languagedetect/Text/LanguageDetect/ISO639.php';
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Detects the language of a given piece of text.
|
* Detects the language of a given piece of text.
|
||||||
*
|
*
|
||||||
|
@ -189,7 +190,7 @@ class Text_LanguageDetect
|
||||||
*/
|
*/
|
||||||
protected function _get_data_loc($fname)
|
protected function _get_data_loc($fname)
|
||||||
{
|
{
|
||||||
if (substr($fname,0,1) == '/' || substr($fname,0,1) == '.') {
|
if ($fname[0] == '/' || $fname[0] == '.') {
|
||||||
// if filename starts with a slash, assume it's an absolute pathname
|
// if filename starts with a slash, assume it's an absolute pathname
|
||||||
// and skip whatever is in $this->_data_dir
|
// and skip whatever is in $this->_data_dir
|
||||||
return $fname;
|
return $fname;
|
||||||
|
@ -247,12 +248,6 @@ class Text_LanguageDetect
|
||||||
protected function _checkTrigram($trigram)
|
protected function _checkTrigram($trigram)
|
||||||
{
|
{
|
||||||
if (!is_array($trigram)) {
|
if (!is_array($trigram)) {
|
||||||
if (ini_get('magic_quotes_runtime')) {
|
|
||||||
throw new Text_LanguageDetect_Exception(
|
|
||||||
'Error loading database. Try turning magic_quotes_runtime off.',
|
|
||||||
Text_LanguageDetect_Exception::MAGIC_QUOTES
|
|
||||||
);
|
|
||||||
}
|
|
||||||
throw new Text_LanguageDetect_Exception(
|
throw new Text_LanguageDetect_Exception(
|
||||||
'Language database is not an array.',
|
'Language database is not an array.',
|
||||||
Text_LanguageDetect_Exception::DB_NOT_ARRAY
|
Text_LanguageDetect_Exception::DB_NOT_ARRAY
|
||||||
|
|
|
@ -44,7 +44,7 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
|
||||||
*
|
*
|
||||||
* @var string
|
* @var string
|
||||||
*/
|
*/
|
||||||
protected $_trigrams = array();
|
protected $_trigram = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stores the trigram ranks of the sample
|
* Stores the trigram ranks of the sample
|
||||||
|
|
|
@ -27,6 +27,6 @@
|
||||||
"ext-mbstring": "May require the mbstring PHP extension"
|
"ext-mbstring": "May require the mbstring PHP extension"
|
||||||
},
|
},
|
||||||
"require-dev": {
|
"require-dev": {
|
||||||
"phpunit/phpunit": "*"
|
"phpunit/phpunit": "8.*|9.*"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
18
library/text_languagedetect/docs/confidence.php
Normal file
18
library/text_languagedetect/docs/confidence.php
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
<?php
|
||||||
|
require_once 'Text/LanguageDetect.php';
|
||||||
|
|
||||||
|
$text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?';
|
||||||
|
|
||||||
|
$ld = new Text_LanguageDetect();
|
||||||
|
//3 most probable languages
|
||||||
|
$results = $ld->detect($text, 3);
|
||||||
|
|
||||||
|
foreach ($results as $language => $confidence) {
|
||||||
|
echo $language . ': ' . number_format($confidence, 2) . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
//output:
|
||||||
|
//german: 0.35
|
||||||
|
//dutch: 0.25
|
||||||
|
//swedish: 0.20
|
||||||
|
?>
|
15
library/text_languagedetect/docs/errorhandling.php
Normal file
15
library/text_languagedetect/docs/errorhandling.php
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* How to handle errors
|
||||||
|
*/
|
||||||
|
require_once 'Text/LanguageDetect.php';
|
||||||
|
require_once 'Text/LanguageDetect/Exception.php';
|
||||||
|
|
||||||
|
try {
|
||||||
|
$ld = new Text_LanguageDetect();
|
||||||
|
$lang = $ld->detectSimple('Das ist ein kleiner Text');
|
||||||
|
echo "Language is: $lang\n";
|
||||||
|
} catch (Text_LanguageDetect_Exception $e) {
|
||||||
|
echo 'An error occured! Message: ' . $e . "\n";
|
||||||
|
}
|
||||||
|
?>
|
35
library/text_languagedetect/docs/example_clui.php
Normal file
35
library/text_languagedetect/docs/example_clui.php
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* example usage (CLI)
|
||||||
|
*
|
||||||
|
* @package Text_LanguageDetect
|
||||||
|
* @version CVS: $Id$
|
||||||
|
*/
|
||||||
|
|
||||||
|
require_once 'Text/LanguageDetect.php';
|
||||||
|
|
||||||
|
$l = new Text_LanguageDetect;
|
||||||
|
|
||||||
|
$stdin = fopen('php://stdin', 'r');
|
||||||
|
|
||||||
|
echo "Supported languages:\n";
|
||||||
|
$langs = $l->getLanguages();
|
||||||
|
sort($langs);
|
||||||
|
echo join(', ', $langs);
|
||||||
|
|
||||||
|
echo "\ntotal ", count($langs), "\n\n";
|
||||||
|
|
||||||
|
while ($line = fgets($stdin)) {
|
||||||
|
$result = $l->detect($line, 4);
|
||||||
|
print_r($result);
|
||||||
|
$blocks = $l->detectUnicodeBlocks($line, true);
|
||||||
|
print_r($blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose($stdin);
|
||||||
|
unset($l);
|
||||||
|
|
||||||
|
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
|
||||||
|
|
||||||
|
?>
|
72
library/text_languagedetect/docs/example_web.php
Normal file
72
library/text_languagedetect/docs/example_web.php
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* example usage (web)
|
||||||
|
*
|
||||||
|
* @package Text_LanguageDetect
|
||||||
|
* @version CVS: $Id$
|
||||||
|
*/
|
||||||
|
|
||||||
|
// browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded
|
||||||
|
header('Content-type: text/html; charset=utf-8', true);
|
||||||
|
|
||||||
|
require_once 'Text/LanguageDetect.php';
|
||||||
|
|
||||||
|
$l = new Text_LanguageDetect;
|
||||||
|
if (isset($_REQUEST['q'])) {
|
||||||
|
$q = stripslashes($_REQUEST['q']);
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Text_LanguageDetect demonstration</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h2>Text_LanguageDetect</h2>
|
||||||
|
<?
|
||||||
|
echo "<small>Supported languages:\n";
|
||||||
|
$langs = $l->getLanguages();
|
||||||
|
sort($langs);
|
||||||
|
foreach ($langs as $lang) {
|
||||||
|
echo ucfirst($lang), ', ';
|
||||||
|
$i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "<br />total $i</small><br /><br />";
|
||||||
|
|
||||||
|
?>
|
||||||
|
<form method="post">
|
||||||
|
Enter text to identify language (at least a couple of sentences):<br />
|
||||||
|
<textarea name="q" wrap="virtual" cols="80" rows="8"><?= $q ?></textarea>
|
||||||
|
<br />
|
||||||
|
<input type="submit" value="Submit" />
|
||||||
|
</form>
|
||||||
|
<?
|
||||||
|
if (isset($q) && strlen($q)) {
|
||||||
|
$len = $l->utf8strlen($q);
|
||||||
|
if ($len < 20) { // this value picked somewhat arbitrarily
|
||||||
|
echo "Warning: string not very long ($len chars)<br />\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
$result = $l->detectConfidence($q);
|
||||||
|
|
||||||
|
if ($result == null) {
|
||||||
|
echo "Text_LanguageDetect cannot identify this piece of text. <br /><br />\n";
|
||||||
|
} else {
|
||||||
|
echo "Text_LanguageDetect thinks this text is written in <b>{$result['language']}</b> ({$result['similarity']}, {$result['confidence']})<br /><br />\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
$result = $l->detectUnicodeBlocks($q, false);
|
||||||
|
if (!empty($result)) {
|
||||||
|
arsort($result);
|
||||||
|
echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n<br /><br />";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unset($l);
|
||||||
|
|
||||||
|
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
|
||||||
|
|
||||||
|
?>
|
||||||
|
</body></html>
|
19
library/text_languagedetect/docs/iso.php
Normal file
19
library/text_languagedetect/docs/iso.php
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Demonstrates how to use ISO language codes.
|
||||||
|
*
|
||||||
|
* The "name mode" changes the way languages are accepted and returned.
|
||||||
|
*/
|
||||||
|
require_once 'Text/LanguageDetect.php';
|
||||||
|
$ld = new Text_LanguageDetect();
|
||||||
|
|
||||||
|
//will output the ISO 639-1 two-letter language code
|
||||||
|
// "de"
|
||||||
|
$ld->setNameMode(2);
|
||||||
|
echo $ld->detectSimple('Das ist ein kleiner Text') . "\n";
|
||||||
|
|
||||||
|
//will output the ISO 639-2 three-letter language code
|
||||||
|
// "deu"
|
||||||
|
$ld->setNameMode(3);
|
||||||
|
echo $ld->detectSimple('Das ist ein kleiner Text') . "\n";
|
||||||
|
?>
|
11
library/text_languagedetect/docs/languages.php
Normal file
11
library/text_languagedetect/docs/languages.php
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* List all supported languages
|
||||||
|
*/
|
||||||
|
require_once 'Text/LanguageDetect.php';
|
||||||
|
$ld = new Text_LanguageDetect();
|
||||||
|
|
||||||
|
foreach ($ld->getLanguages() as $lang) {
|
||||||
|
echo $lang . "\n";
|
||||||
|
}
|
||||||
|
?>
|
10
library/text_languagedetect/docs/simple.php
Normal file
10
library/text_languagedetect/docs/simple.php
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<?php
|
||||||
|
require_once 'Text/LanguageDetect.php';
|
||||||
|
|
||||||
|
$text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?';
|
||||||
|
|
||||||
|
$ld = new Text_LanguageDetect();
|
||||||
|
$result = $ld->detectSimple($text);
|
||||||
|
var_dump($result);
|
||||||
|
//output: german
|
||||||
|
?>
|
10
library/text_languagedetect/phpcs.xml
Normal file
10
library/text_languagedetect/phpcs.xml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<ruleset name="PEAR-textlanguagedetect">
|
||||||
|
<rule ref="PEAR">
|
||||||
|
<!-- we keep the old php4-style variable names for now -->
|
||||||
|
<exclude name="PEAR.NamingConventions.ValidFunctionName.PublicUnderscore"/>
|
||||||
|
<exclude name="PEAR.NamingConventions.ValidVariableName.PublicUnderscore"/>
|
||||||
|
<!-- we keep the method names for BC reasons -->
|
||||||
|
<exclude name="PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps"/>
|
||||||
|
</rule>
|
||||||
|
</ruleset>
|
42
library/text_languagedetect/tests/PrivProxy.php
Normal file
42
library/text_languagedetect/tests/PrivProxy.php
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Helper that enables access to private and protected methods and properties.
|
||||||
|
*/
|
||||||
|
class PrivProxy
|
||||||
|
{
|
||||||
|
private $obj;
|
||||||
|
|
||||||
|
public function __construct($obj)
|
||||||
|
{
|
||||||
|
$this->obj = $obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function __call($method, $arguments)
|
||||||
|
{
|
||||||
|
$rm = new ReflectionMethod($this->obj, $method);
|
||||||
|
$rm->setAccessible(true);
|
||||||
|
return $rm->invokeArgs($this->obj, $arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function __callStatic($method, $arguments)
|
||||||
|
{
|
||||||
|
$rm = new ReflectionMethod($this->obj, $method);
|
||||||
|
$rm->setAccessible(true);
|
||||||
|
return $rm->invokeArgs($this->obj, $arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function __set($var, $value)
|
||||||
|
{
|
||||||
|
$rp = new ReflectionProperty($this->obj, $var);
|
||||||
|
$rp->setAccessible(true);
|
||||||
|
$rp->setValue($this->obj, $value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function __get($var)
|
||||||
|
{
|
||||||
|
$rp = new ReflectionProperty($this->obj, $var);
|
||||||
|
$rp->setAccessible(true);
|
||||||
|
return $rp->getValue($this->obj);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
?>
|
2036
library/text_languagedetect/tests/Text_LanguageDetectTest.php
Normal file
2036
library/text_languagedetect/tests/Text_LanguageDetectTest.php
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,72 @@
|
||||||
|
<?php
|
||||||
|
set_include_path(
|
||||||
|
__DIR__ . '/../' . PATH_SEPARATOR . get_include_path()
|
||||||
|
);
|
||||||
|
|
||||||
|
require_once 'Text/LanguageDetect/ISO639.php';
|
||||||
|
|
||||||
|
class Text_LanguageDetect_ISO639Test extends PHPUnit\Framework\TestCase
|
||||||
|
{
|
||||||
|
public function testNameToCode2()
|
||||||
|
{
|
||||||
|
$this->assertEquals(
|
||||||
|
'de',
|
||||||
|
Text_LanguageDetect_ISO639::nameToCode2('german')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testNameToCode2Fail()
|
||||||
|
{
|
||||||
|
$this->assertNull(
|
||||||
|
Text_LanguageDetect_ISO639::nameToCode2('doesnotexist')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testNameToCode3()
|
||||||
|
{
|
||||||
|
$this->assertEquals(
|
||||||
|
'fra',
|
||||||
|
Text_LanguageDetect_ISO639::nameToCode3('french')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testNameToCode3Fail()
|
||||||
|
{
|
||||||
|
$this->assertNull(
|
||||||
|
Text_LanguageDetect_ISO639::nameToCode3('doesnotexist')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCode2ToName()
|
||||||
|
{
|
||||||
|
$this->assertEquals(
|
||||||
|
'english',
|
||||||
|
Text_LanguageDetect_ISO639::code2ToName('en')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCode2ToNameFail()
|
||||||
|
{
|
||||||
|
$this->assertNull(
|
||||||
|
Text_LanguageDetect_ISO639::code2ToName('nx')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCode3ToName()
|
||||||
|
{
|
||||||
|
$this->assertEquals(
|
||||||
|
'romanian',
|
||||||
|
Text_LanguageDetect_ISO639::code3ToName('rom')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCode3ToNameFail()
|
||||||
|
{
|
||||||
|
$this->assertNull(
|
||||||
|
Text_LanguageDetect_ISO639::code3ToName('nxx')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
|
@ -5,5 +5,5 @@ set_include_path(
|
||||||
. '../library' . PATH_SEPARATOR
|
. '../library' . PATH_SEPARATOR
|
||||||
. '../'
|
. '../'
|
||||||
);
|
);
|
||||||
define('UNIT_TESTING', 1);
|
define('\UNIT_TESTING', 1);
|
||||||
require_once('boot.php');
|
require_once('boot.php');
|
||||||
|
|
|
@ -6,7 +6,6 @@ use Code\Lib\Activity;
|
||||||
use Code\Lib\JcsEddsa2022;
|
use Code\Lib\JcsEddsa2022;
|
||||||
use Code\Tests\Unit\UnitTestCase;
|
use Code\Tests\Unit\UnitTestCase;
|
||||||
|
|
||||||
|
|
||||||
class JcsEddsa2022Test extends UnitTestCase
|
class JcsEddsa2022Test extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -361,6 +360,4 @@ ENJCGgOH8Bhpk+y1jtw1jpTig76wIvw+6zQtgNSfPnrNGIHt5mcoy4pFFXLv2lK2
|
||||||
$this->assertEquals('z6MkrD9t4uWqskmcVZVyzHVnRUVTnEM4fTCydm7oWMBXUHQH', $key, 'discover key as array');
|
$this->assertEquals('z6MkrD9t4uWqskmcVZVyzHVnRUVTnEM4fTCydm7oWMBXUHQH', $key, 'discover key as array');
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue