diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php index 97ecdb977b..54af953544 100644 --- a/src/Util/ParseUrl.php +++ b/src/Util/ParseUrl.php @@ -78,7 +78,7 @@ class ParseUrl return []; } - $contenttype = $curlResult->getContentType(); + $contenttype = $curlResult->getContentType(); if (empty($contenttype)) { return ['application', 'octet-stream']; } @@ -109,16 +109,14 @@ class ParseUrl { if (empty($url)) { return [ - 'url' => '', + 'url' => '', 'type' => 'error', ]; } $urlHash = hash('sha256', $url); - $parsed_url = DBA::selectFirst('parsed_url', ['content'], - ['url_hash' => $urlHash, 'oembed' => false] - ); + $parsed_url = DBA::selectFirst('parsed_url', ['content'], ['url_hash' => $urlHash, 'oembed' => false]); if (!empty($parsed_url['content'])) { $data = unserialize($parsed_url['content']); return $data; @@ -187,7 +185,7 @@ class ParseUrl { if (empty($url)) { return [ - 'url' => '', + 'url' => '', 'type' => 'error', ]; } @@ -204,8 +202,8 @@ class ParseUrl $url = Network::stripTrackingQueryParams($url); $siteinfo = [ - 'url' => $url, - 'type' => 'link', + 'url' => $url, + 'type' => 'link', 'expires' => DateTimeFormat::utc(self::DEFAULT_EXPIRATION_FAILURE), ]; @@ -246,11 +244,13 @@ class ParseUrl if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')[0] ?? '') { if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) { $maxAge = max(86400, (int)array_pop($matches)); + $siteinfo['expires'] = DateTimeFormat::utc("now + $maxAge seconds"); } } $body = $curlResult->getBodyString(); + $siteinfo['size'] = mb_strlen($body); $charset = ''; @@ -260,7 +260,8 @@ class ParseUrl if (isset($mediaType->parameters['charset'])) { $charset = $mediaType->parameters['charset']; } - } catch(\InvalidArgumentException $e) {} + } catch(\InvalidArgumentException $e) { + } $siteinfo['charset'] = $charset; @@ -306,10 +307,9 @@ class ParseUrl } if (@$meta_tag['http-equiv'] == 'refresh') { - $path = $meta_tag['content']; - $pathinfo = explode(';', $path); + $path = $meta_tag['content']; $content = ''; - foreach ($pathinfo as $value) { + foreach (explode(';', $path) as $value) { if (substr(strtolower($value), 0, 4) == 'url=') { $content = substr($value, 4); } @@ -455,7 +455,8 @@ class ParseUrl $list = $xpath->query("//script[@type='application/ld+json']"); foreach ($list as $node) { if (!empty($node->nodeValue)) { - if ($jsonld = json_decode($node->nodeValue, true)) { + $jsonld = json_decode($node->nodeValue, true); + if (is_array($jsonld)) { $siteinfo = self::parseParts($siteinfo, $jsonld); } } @@ -488,6 +489,7 @@ class ParseUrl if (!empty($siteinfo['text']) && mb_strlen($siteinfo['text']) > self::MAX_DESC_COUNT) { $siteinfo['text'] = mb_substr($siteinfo['text'], 0, self::MAX_DESC_COUNT) . '…'; + $pos = mb_strrpos($siteinfo['text'], '.'); if ($pos > self::MIN_DESC_COUNT) { $siteinfo['text'] = mb_substr($siteinfo['text'], 0, $pos + 1); @@ -511,7 +513,7 @@ class ParseUrl * @param array $siteinfo * @return array */ - private static function checkMedia(string $page_url, array $siteinfo) : array + private static function checkMedia(string $page_url, array $siteinfo): array { if (!empty($siteinfo['images'])) { array_walk($siteinfo['images'], function (&$image) use ($page_url) { @@ -522,13 +524,14 @@ class ParseUrl */ if (!empty($image['url'])) { $image['url'] = self::completeUrl($image['url'], $page_url); + $photodata = Images::getInfoFromURLCached($image['url']); if (($photodata) && ($photodata[0] > 50) && ($photodata[1] > 50)) { - $image['src'] = $image['url']; - $image['width'] = $photodata[0]; - $image['height'] = $photodata[1]; + $image['src'] = $image['url']; + $image['width'] = $photodata[0]; + $image['height'] = $photodata[1]; $image['contenttype'] = $photodata['mime']; - $image['blurhash'] = $photodata['blurhash'] ?? null; + $image['blurhash'] = $photodata['blurhash'] ?? null; unset($image['url']); ksort($image); } else { @@ -545,13 +548,14 @@ class ParseUrl foreach (['audio', 'video'] as $element) { if (!empty($siteinfo[$element])) { array_walk($siteinfo[$element], function (&$media) use ($page_url, &$siteinfo) { - $url = ''; - $embed = ''; - $content = ''; + $url = ''; + $embed = ''; + $content = ''; $contenttype = ''; foreach (['embed', 'content', 'url'] as $field) { if (!empty($media[$field])) { $media[$field] = self::completeUrl($media[$field], $page_url); + $type = self::getContentType($media[$field]); if (($type[0] ?? '') == 'text') { if ($field == 'embed') { @@ -560,7 +564,7 @@ class ParseUrl $url = $media[$field]; } } elseif (!empty($type[0])) { - $content = $media[$field]; + $content = $media[$field]; $contenttype = implode('/', $type); } } @@ -707,7 +711,7 @@ class ParseUrl } elseif (!empty($jsonld['@type'])) { $siteinfo = self::parseJsonLd($siteinfo, $jsonld); } elseif (!empty($jsonld)) { - $keys = array_keys($jsonld); + $keys = array_keys($jsonld); $numeric_keys = true; foreach ($keys as $key) { if (!is_int($key)) { @@ -811,7 +815,7 @@ class ParseUrl case 'Person': case 'Patient': case 'PerformingGroup': - case 'DanceGroup'; + case 'DanceGroup': case 'MusicGroup': case 'TheaterGroup': return self::parseJsonLdWebPerson($siteinfo, $jsonld); @@ -954,8 +958,7 @@ class ParseUrl $content = JsonLD::fetchElement($jsonld, 'keywords'); if (!empty($content)) { $siteinfo['keywords'] = []; - $keywords = explode(',', $content); - foreach ($keywords as $keyword) { + foreach (explode(',', $content) as $keyword) { $siteinfo['keywords'][] = trim($keyword); } }