Merge pull request #14708 from annando/exception

Fix exception "Argument #2 ($jsonld) must be of type array, string given"
This commit is contained in:
Tobias Diekershoff 2025-01-19 17:36:54 +01:00 committed by GitHub
commit ee25c69cd7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -78,7 +78,7 @@ class ParseUrl
return []; return [];
} }
$contenttype = $curlResult->getContentType(); $contenttype = $curlResult->getContentType();
if (empty($contenttype)) { if (empty($contenttype)) {
return ['application', 'octet-stream']; return ['application', 'octet-stream'];
} }
@ -109,16 +109,14 @@ class ParseUrl
{ {
if (empty($url)) { if (empty($url)) {
return [ return [
'url' => '', 'url' => '',
'type' => 'error', 'type' => 'error',
]; ];
} }
$urlHash = hash('sha256', $url); $urlHash = hash('sha256', $url);
$parsed_url = DBA::selectFirst('parsed_url', ['content'], $parsed_url = DBA::selectFirst('parsed_url', ['content'], ['url_hash' => $urlHash, 'oembed' => false]);
['url_hash' => $urlHash, 'oembed' => false]
);
if (!empty($parsed_url['content'])) { if (!empty($parsed_url['content'])) {
$data = unserialize($parsed_url['content']); $data = unserialize($parsed_url['content']);
return $data; return $data;
@ -187,7 +185,7 @@ class ParseUrl
{ {
if (empty($url)) { if (empty($url)) {
return [ return [
'url' => '', 'url' => '',
'type' => 'error', 'type' => 'error',
]; ];
} }
@ -204,8 +202,8 @@ class ParseUrl
$url = Network::stripTrackingQueryParams($url); $url = Network::stripTrackingQueryParams($url);
$siteinfo = [ $siteinfo = [
'url' => $url, 'url' => $url,
'type' => 'link', 'type' => 'link',
'expires' => DateTimeFormat::utc(self::DEFAULT_EXPIRATION_FAILURE), 'expires' => DateTimeFormat::utc(self::DEFAULT_EXPIRATION_FAILURE),
]; ];
@ -246,11 +244,13 @@ class ParseUrl
if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')[0] ?? '') { if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')[0] ?? '') {
if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) { if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) {
$maxAge = max(86400, (int)array_pop($matches)); $maxAge = max(86400, (int)array_pop($matches));
$siteinfo['expires'] = DateTimeFormat::utc("now + $maxAge seconds"); $siteinfo['expires'] = DateTimeFormat::utc("now + $maxAge seconds");
} }
} }
$body = $curlResult->getBodyString(); $body = $curlResult->getBodyString();
$siteinfo['size'] = mb_strlen($body); $siteinfo['size'] = mb_strlen($body);
$charset = ''; $charset = '';
@ -260,7 +260,8 @@ class ParseUrl
if (isset($mediaType->parameters['charset'])) { if (isset($mediaType->parameters['charset'])) {
$charset = $mediaType->parameters['charset']; $charset = $mediaType->parameters['charset'];
} }
} catch(\InvalidArgumentException $e) {} } catch(\InvalidArgumentException $e) {
}
$siteinfo['charset'] = $charset; $siteinfo['charset'] = $charset;
@ -306,10 +307,9 @@ class ParseUrl
} }
if (@$meta_tag['http-equiv'] == 'refresh') { if (@$meta_tag['http-equiv'] == 'refresh') {
$path = $meta_tag['content']; $path = $meta_tag['content'];
$pathinfo = explode(';', $path);
$content = ''; $content = '';
foreach ($pathinfo as $value) { foreach (explode(';', $path) as $value) {
if (substr(strtolower($value), 0, 4) == 'url=') { if (substr(strtolower($value), 0, 4) == 'url=') {
$content = substr($value, 4); $content = substr($value, 4);
} }
@ -455,7 +455,8 @@ class ParseUrl
$list = $xpath->query("//script[@type='application/ld+json']"); $list = $xpath->query("//script[@type='application/ld+json']");
foreach ($list as $node) { foreach ($list as $node) {
if (!empty($node->nodeValue)) { if (!empty($node->nodeValue)) {
if ($jsonld = json_decode($node->nodeValue, true)) { $jsonld = json_decode($node->nodeValue, true);
if (is_array($jsonld)) {
$siteinfo = self::parseParts($siteinfo, $jsonld); $siteinfo = self::parseParts($siteinfo, $jsonld);
} }
} }
@ -488,6 +489,7 @@ class ParseUrl
if (!empty($siteinfo['text']) && mb_strlen($siteinfo['text']) > self::MAX_DESC_COUNT) { if (!empty($siteinfo['text']) && mb_strlen($siteinfo['text']) > self::MAX_DESC_COUNT) {
$siteinfo['text'] = mb_substr($siteinfo['text'], 0, self::MAX_DESC_COUNT) . '…'; $siteinfo['text'] = mb_substr($siteinfo['text'], 0, self::MAX_DESC_COUNT) . '…';
$pos = mb_strrpos($siteinfo['text'], '.'); $pos = mb_strrpos($siteinfo['text'], '.');
if ($pos > self::MIN_DESC_COUNT) { if ($pos > self::MIN_DESC_COUNT) {
$siteinfo['text'] = mb_substr($siteinfo['text'], 0, $pos + 1); $siteinfo['text'] = mb_substr($siteinfo['text'], 0, $pos + 1);
@ -511,7 +513,7 @@ class ParseUrl
* @param array $siteinfo * @param array $siteinfo
* @return array * @return array
*/ */
private static function checkMedia(string $page_url, array $siteinfo) : array private static function checkMedia(string $page_url, array $siteinfo): array
{ {
if (!empty($siteinfo['images'])) { if (!empty($siteinfo['images'])) {
array_walk($siteinfo['images'], function (&$image) use ($page_url) { array_walk($siteinfo['images'], function (&$image) use ($page_url) {
@ -522,13 +524,14 @@ class ParseUrl
*/ */
if (!empty($image['url'])) { if (!empty($image['url'])) {
$image['url'] = self::completeUrl($image['url'], $page_url); $image['url'] = self::completeUrl($image['url'], $page_url);
$photodata = Images::getInfoFromURLCached($image['url']); $photodata = Images::getInfoFromURLCached($image['url']);
if (($photodata) && ($photodata[0] > 50) && ($photodata[1] > 50)) { if (($photodata) && ($photodata[0] > 50) && ($photodata[1] > 50)) {
$image['src'] = $image['url']; $image['src'] = $image['url'];
$image['width'] = $photodata[0]; $image['width'] = $photodata[0];
$image['height'] = $photodata[1]; $image['height'] = $photodata[1];
$image['contenttype'] = $photodata['mime']; $image['contenttype'] = $photodata['mime'];
$image['blurhash'] = $photodata['blurhash'] ?? null; $image['blurhash'] = $photodata['blurhash'] ?? null;
unset($image['url']); unset($image['url']);
ksort($image); ksort($image);
} else { } else {
@ -545,13 +548,14 @@ class ParseUrl
foreach (['audio', 'video'] as $element) { foreach (['audio', 'video'] as $element) {
if (!empty($siteinfo[$element])) { if (!empty($siteinfo[$element])) {
array_walk($siteinfo[$element], function (&$media) use ($page_url, &$siteinfo) { array_walk($siteinfo[$element], function (&$media) use ($page_url, &$siteinfo) {
$url = ''; $url = '';
$embed = ''; $embed = '';
$content = ''; $content = '';
$contenttype = ''; $contenttype = '';
foreach (['embed', 'content', 'url'] as $field) { foreach (['embed', 'content', 'url'] as $field) {
if (!empty($media[$field])) { if (!empty($media[$field])) {
$media[$field] = self::completeUrl($media[$field], $page_url); $media[$field] = self::completeUrl($media[$field], $page_url);
$type = self::getContentType($media[$field]); $type = self::getContentType($media[$field]);
if (($type[0] ?? '') == 'text') { if (($type[0] ?? '') == 'text') {
if ($field == 'embed') { if ($field == 'embed') {
@ -560,7 +564,7 @@ class ParseUrl
$url = $media[$field]; $url = $media[$field];
} }
} elseif (!empty($type[0])) { } elseif (!empty($type[0])) {
$content = $media[$field]; $content = $media[$field];
$contenttype = implode('/', $type); $contenttype = implode('/', $type);
} }
} }
@ -707,7 +711,7 @@ class ParseUrl
} elseif (!empty($jsonld['@type'])) { } elseif (!empty($jsonld['@type'])) {
$siteinfo = self::parseJsonLd($siteinfo, $jsonld); $siteinfo = self::parseJsonLd($siteinfo, $jsonld);
} elseif (!empty($jsonld)) { } elseif (!empty($jsonld)) {
$keys = array_keys($jsonld); $keys = array_keys($jsonld);
$numeric_keys = true; $numeric_keys = true;
foreach ($keys as $key) { foreach ($keys as $key) {
if (!is_int($key)) { if (!is_int($key)) {
@ -811,7 +815,7 @@ class ParseUrl
case 'Person': case 'Person':
case 'Patient': case 'Patient':
case 'PerformingGroup': case 'PerformingGroup':
case 'DanceGroup'; case 'DanceGroup':
case 'MusicGroup': case 'MusicGroup':
case 'TheaterGroup': case 'TheaterGroup':
return self::parseJsonLdWebPerson($siteinfo, $jsonld); return self::parseJsonLdWebPerson($siteinfo, $jsonld);
@ -954,8 +958,7 @@ class ParseUrl
$content = JsonLD::fetchElement($jsonld, 'keywords'); $content = JsonLD::fetchElement($jsonld, 'keywords');
if (!empty($content)) { if (!empty($content)) {
$siteinfo['keywords'] = []; $siteinfo['keywords'] = [];
$keywords = explode(',', $content); foreach (explode(',', $content) as $keyword) {
foreach ($keywords as $keyword) {
$siteinfo['keywords'][] = trim($keyword); $siteinfo['keywords'][] = trim($keyword);
} }
} }