Merge pull request #9365 from annando/better-plaintext

Improve plaintext generation for language detection
This commit is contained in:
Hypolite Petovan 2020-10-05 23:11:42 -04:00 committed by GitHub
commit 9954bf8adf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 1 deletions

View file

@ -2482,7 +2482,17 @@ class Item
return '';
}
$naked_body = BBCode::toPlaintext($item['body'], false);
// Convert attachments to links
$naked_body = BBCode::removeAttachment($item['body']);
// Remove links and pictures
$naked_body = BBCode::removeLinks($naked_body);
// Convert the title and the body to plain text
$naked_body = trim($item['title'] . "\n" . BBCode::toPlaintext($naked_body));
// Remove possibly remaining links
$naked_body = preg_replace(Strings::autoLinkRegEx(), '', $naked_body);
$ld = new Language();
$languages = $ld->detect($naked_body)->limit(0, 3)->close();