Improved summary handling for feeds

This commit is contained in:
Michael 2024-05-23 19:45:42 +00:00
parent d20c5493ae
commit 4066a5403c

View file

@ -374,10 +374,12 @@ class Feed
$guid = XML::getFirstNodeValue($xpath, 'guid/text()', $entry); $guid = XML::getFirstNodeValue($xpath, 'guid/text()', $entry);
if (!empty($guid)) { if (!empty($guid)) {
$item['uri'] = $guid; if (empty($item['uri'])) {
$item['uri'] = $guid;
}
// Don't use the GUID value directly but instead use it as a basis for the GUID // Don't use the GUID value directly but instead use it as a basis for the GUID
$item['guid'] = Item::guidFromUri($guid, parse_url($guid, PHP_URL_HOST) ?? parse_url($item['plink'], PHP_URL_HOST)); $item['guid'] = Item::guidFromUri($guid, parse_url($item['plink'], PHP_URL_HOST));
} }
if (empty($item['uri'])) { if (empty($item['uri'])) {
@ -410,7 +412,7 @@ class Feed
$item['title'] = XML::getFirstNodeValue($xpath, 'itunes:title/text()', $entry); $item['title'] = XML::getFirstNodeValue($xpath, 'itunes:title/text()', $entry);
} }
$item['title'] = html_entity_decode($item['title'], ENT_QUOTES, 'UTF-8'); $item['title'] = trim(html_entity_decode($item['title'], ENT_QUOTES, 'UTF-8'));
$published = XML::getFirstNodeValue($xpath, $atomns . ':published/text()', $entry); $published = XML::getFirstNodeValue($xpath, $atomns . ':published/text()', $entry);
@ -538,28 +540,14 @@ class Feed
$summary = ''; $summary = '';
} }
if ($body == $summary) {
$summary = '';
}
// remove the content of the title if it is identically to the body // remove the content of the title if it is identically to the body
// This helps with auto generated titles e.g. from tumblr // This helps with auto generated titles e.g. from tumblr
if (self::titleIsBody($item['title'], $body)) { if (self::titleIsBody($item['title'], $body)) {
$item['title'] = ''; $item['title'] = '';
} }
if (!HTML::isHTML($body)) { $item['body'] = self::formatBody($body, $basepath);
$html = BBCode::convert($body, false, BBCode::EXTERNAL); $summary = self::formatBody($summary, $basepath);
if ($body != $html) {
Logger::debug('Body contained no HTML', ['original' => $body, 'converted' => $html]);
$body = $html;
}
}
$item['body'] = HTML::toBBCode($body, $basepath);
// Remove tracking pixels
$item['body'] = preg_replace("/\[img=1x1\]([^\[\]]*)\[\/img\]/Usi", '', $item['body']);
if (($item['body'] == '') && ($item['title'] != '')) { if (($item['body'] == '') && ($item['title'] != '')) {
$item['body'] = $item['title']; $item['body'] = $item['title'];
@ -593,24 +581,18 @@ class Feed
$item['body'] = str_replace($item['plink'], '', $item['body']); $item['body'] = str_replace($item['plink'], '', $item['body']);
$item['body'] = trim(preg_replace('/\[url\=\](\w+.*?)\[\/url\]/i', '', $item['body'])); $item['body'] = trim(preg_replace('/\[url\=\](\w+.*?)\[\/url\]/i', '', $item['body']));
// Replace the content when the title is longer than the body $summary = str_replace($item['plink'], '', $summary);
$replace = (strlen($item['title']) > strlen($item['body'])); $summary = trim(preg_replace('/\[url\=\](\w+.*?)\[\/url\]/i', '', $summary));
// Replace it, when there is an image in the body if (!empty($summary) && self::replaceBodyWithTitle($summary, $item['title'])) {
if (strstr($item['body'], '[/img]')) { $summary = '';
$replace = true;
}
// Replace it, when there is a link in the body
if (strstr($item['body'], '[/url]')) {
$replace = true;
} }
$saved_body = $item['body']; $saved_body = $item['body'];
$saved_title = $item['title']; $saved_title = $item['title'];
if ($replace) { if (self::replaceBodyWithTitle($item['body'], $item['title'])) {
$item['body'] = trim($item['title']); $item['body'] = $summary ?: $item['title'];
} }
$data = ParseUrl::getSiteinfoCached($item['plink']); $data = ParseUrl::getSiteinfoCached($item['plink']);
@ -677,10 +659,6 @@ class Feed
} }
} }
} else { } else {
if (!empty($summary)) {
$item['content-warning'] = HTML::toBBCode($summary, $basepath);
}
if ($fetch_further_information == LocalRelationship::FFI_KEYWORD) { if ($fetch_further_information == LocalRelationship::FFI_KEYWORD) {
if (empty($taglist)) { if (empty($taglist)) {
$taglist = PageInfo::getTagsFromUrl($item['plink'], $preview, $contact['ffi_keyword_denylist'] ?? ''); $taglist = PageInfo::getTagsFromUrl($item['plink'], $preview, $contact['ffi_keyword_denylist'] ?? '');
@ -1308,4 +1286,37 @@ class Feed
return substr($title, 0, $pos) . $trailer; return substr($title, 0, $pos) . $trailer;
} }
private static function formatBody(string $body, string $basepath): string
{
if (!HTML::isHTML($body)) {
$html = BBCode::convert($body, false, BBCode::EXTERNAL);
if ($body != $html) {
Logger::debug('Body contained no HTML', ['original' => $body, 'converted' => $html]);
$body = $html;
}
}
$body = HTML::toBBCode($body, $basepath);
// Remove tracking pixels
return preg_replace("/\[img=1x1\]([^\[\]]*)\[\/img\]/Usi", '', $body);
}
private static function replaceBodyWithTitle(string $body, string $title): bool
{
// Replace the content when the title is longer than the body
$replace = (strlen($title) > strlen($body));
// Replace it, when there is an image in the body
if (strstr($body, '[/img]')) {
$replace = true;
}
// Replace it, when there is a link in the body
if (strstr($body, '[/url]')) {
$replace = true;
}
return $replace;
}
} }