Support for non HTML content for feed imports

This commit is contained in:
Michael 2024-05-11 20:03:19 +00:00
parent ee9510e17d
commit bca86beda0
2 changed files with 22 additions and 3 deletions

View file

@ -253,8 +253,10 @@ class HTML
self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]'); self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]');
self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]'); self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]');
$elements = ['b', 'del', 'em', 'i', 'ins', 'kbd', 'mark', $elements = [
's', 'samp', 'strong', 'sub', 'sup', 'u', 'var']; 'b', 'del', 'em', 'i', 'ins', 'kbd', 'mark',
's', 'samp', 'strong', 'sub', 'sup', 'u', 'var'
];
foreach ($elements as $element) { foreach ($elements as $element) {
self::tagToBBCode($doc, $element, [], '[' . $element . ']', '[/' . $element . ']'); self::tagToBBCode($doc, $element, [], '[' . $element . ']', '[/' . $element . ']');
} }
@ -1059,4 +1061,15 @@ class HTML
return null; return null;
} }
/**
* Check if a document contains HTML or entities
*
* @param string $text
* @return boolean
*/
public static function isHTML(string $text): bool
{
return ($text != html_entity_decode($text)) || ($text != strip_tags($text));
}
} }

View file

@ -29,7 +29,6 @@ use Friendica\Contact\LocalRelationship\Entity\LocalRelationship;
use Friendica\Content\PageInfo; use Friendica\Content\PageInfo;
use Friendica\Content\Text\BBCode; use Friendica\Content\Text\BBCode;
use Friendica\Content\Text\HTML; use Friendica\Content\Text\HTML;
use Friendica\Core\Cache\Enum\Duration;
use Friendica\Core\Logger; use Friendica\Core\Logger;
use Friendica\Core\Protocol; use Friendica\Core\Protocol;
use Friendica\Core\Worker; use Friendica\Core\Worker;
@ -546,6 +545,13 @@ class Feed
if (self::titleIsBody($item['title'], $body)) { if (self::titleIsBody($item['title'], $body)) {
$item['title'] = ''; $item['title'] = '';
} }
if (!HTML::isHTML($body)) {
$original = $body;
$body = BBCode::convert($body, false, BBCode::EXTERNAL);
Logger::debug('Body contained no HTML', ['original' => $original, 'converted' => $body]);
}
$item['body'] = HTML::toBBCode($body, $basepath); $item['body'] = HTML::toBBCode($body, $basepath);
// Remove tracking pixels // Remove tracking pixels