From 8485c8c3571cc68e02a18bc228b9fd6d2bbcf982 Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 31 Oct 2020 13:26:08 +0000 Subject: [PATCH] The post-media table now works with the "attach" field as well --- mod/item.php | 8 +- src/Model/Item.php | 16 ++++ src/Model/Post/Media.php | 116 ++++++++++++++++++++++--- src/Protocol/ActivityPub/Processor.php | 3 +- src/Protocol/DFRN.php | 3 +- src/Protocol/Feed.php | 3 +- src/Protocol/OStatus.php | 4 +- 7 files changed, 131 insertions(+), 22 deletions(-) diff --git a/mod/item.php b/mod/item.php index 8744ba3b2e..9828d1acb2 100644 --- a/mod/item.php +++ b/mod/item.php @@ -35,7 +35,6 @@ use Friendica\Content\Text\BBCode; use Friendica\Core\Hook; use Friendica\Core\Logger; use Friendica\Core\Protocol; -use Friendica\Core\Renderer; use Friendica\Core\Session; use Friendica\Core\System; use Friendica\Core\Worker; @@ -48,6 +47,7 @@ use Friendica\Model\FileTag; use Friendica\Model\Item; use Friendica\Model\Notify\Type; use Friendica\Model\Photo; +use Friendica\Model\Post; use Friendica\Model\Tag; use Friendica\Network\HTTPException; use Friendica\Object\EMail\ItemCCEMail; @@ -55,7 +55,6 @@ use Friendica\Protocol\Activity; use Friendica\Protocol\Diaspora; use Friendica\Util\DateTimeFormat; use Friendica\Security\Security; -use Friendica\Util\Strings; use Friendica\Worker\Delivery; function item_post(App $a) { @@ -532,9 +531,8 @@ function item_post(App $a) { if (strlen($attachments)) { $attachments .= ','; } - $attachments .= '[attach]href="' . DI::baseUrl() . '/attach/' . $attachment['id'] . - '" length="' . $attachment['filesize'] . '" type="' . $attachment['filetype'] . - '" title="' . ($attachment['filename'] ? $attachment['filename'] : '') . '"[/attach]'; + $attachments .= Post\Media::getAttachElement(DI::baseUrl() . '/attach/' . $attachment['id'], + $attachment['filesize'], $attachment['filetype'], $attachment['filename'] ?? ''); } $body = str_replace($match[1],'',$body); } diff --git a/src/Model/Item.php b/src/Model/Item.php index 46d28ee822..3b1a58cc85 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -968,6 +968,14 @@ class Item while ($item = DBA::fetch($items)) { if (empty($content_fields['verb']) || !in_array($content_fields['verb'], self::ACTIVITIES)) { + if (!empty($content_fields['body'])) { + $content_fields['raw-body'] = trim($content_fields['raw-body'] ?? $content_fields['body']); + + // Remove all media attachments from the body and store them in the post-media table + $content_fields['raw-body'] = Post\Media::insertFromBody($item['uri-id'], $content_fields['raw-body']); + $content_fields['raw-body'] = self::setHashtags($content_fields['raw-body']); + } + self::updateContent($content_fields, ['uri-id' => $item['uri-id']]); if (empty($item['icid'])) { @@ -994,6 +1002,10 @@ class Item } } + if (!empty($fields['attach'])) { + Post\Media::insertFromAttachment($item['uri-id'], $fields['attach']); + } + Post\DeliveryData::update($item['uri-id'], $delivery_data); self::updateThread($item['id']); @@ -1826,6 +1838,10 @@ class Item // Check for hashtags in the body and repair or add hashtag links $item['body'] = self::setHashtags($item['body']); + if (!empty($item['attach'])) { + Post\Media::insertFromAttachment($item['uri-id'], $item['attach']); + } + // Fill the cache field self::putInCache($item); diff --git a/src/Model/Post/Media.php b/src/Model/Post/Media.php index ec3bf967b3..554cc6c4cc 100644 --- a/src/Model/Post/Media.php +++ b/src/Model/Post/Media.php @@ -24,6 +24,7 @@ namespace Friendica\Model\Post; use Friendica\Core\Logger; use Friendica\Core\System; use Friendica\Database\DBA; +use Friendica\DI; use Friendica\Util\Images; /** @@ -34,11 +35,12 @@ use Friendica\Util\Images; */ class Media { - const UNKNOWN = 0; - const IMAGE = 1; - const VIDEO = 2; - const AUDIO = 3; - const TORRENT = 16; + const UNKNOWN = 0; + const IMAGE = 1; + const VIDEO = 2; + const AUDIO = 3; + const TORRENT = 16; + const DOCUMENT = 128; /** * Insert a post-media record @@ -46,25 +48,90 @@ class Media * @param array $media * @return void */ - public static function insert(array $media) + public static function insert(array $media, bool $force = false) { - if (empty($media['url']) || empty($media['uri-id'])) { + if (empty($media['url']) || empty($media['uri-id']) || empty($media['type'])) { + Logger::warning('Incomplete media data', ['media' => $media]); return; } - if (DBA::exists('post-media', ['uri-id' => $media['uri-id'], 'url' => $media['url']])) { + // "document" has got the lowest priority. So when the same file is both attached as document + // and embedded as picture then we only store the picture or replace the document + $found = DBA::selectFirst('post-media', ['type'], ['uri-id' => $media['uri-id'], 'url' => $media['url']]); + if (!$force && !empty($found) && (($found['type'] != self::DOCUMENT) || ($media['type'] == self::DOCUMENT))) { Logger::info('Media already exists', ['uri-id' => $media['uri-id'], 'url' => $media['url'], 'callstack' => System::callstack()]); return; } - $fields = ['type', 'mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description']; + $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description']; foreach ($fields as $field) { if (empty($media[$field])) { unset($media[$field]); } } - if ($media['type'] == self::IMAGE) { + // We are storing as fast as possible to avoid duplicated network requests + // when fetching additional information for pictures and other content. + $result = DBA::insert('post-media', $media, true); + Logger::info('Stored media', ['result' => $result, 'media' => $media, 'callstack' => System::callstack()]); + $stored = $media; + + $media = self::fetchAdditionalData($media); + + if (array_diff_assoc($media, $stored)) { + $result = DBA::insert('post-media', $media, true); + Logger::info('Updated media', ['result' => $result, 'media' => $media]); + } else { + Logger::info('Norhing to update', ['media' => $media]); + } + } + + /** + * Creates the "[attach]" element from the given attributes + * + * @param string $href + * @param integer $length + * @param string $type + * @param string $title + * @return string "[attach]" element + */ + public static function getAttachElement(string $href, int $length, string $type, string $title = '') + { + $media = self::fetchAdditionalData(['type' => self::DOCUMENT, 'url' => $href, + 'size' => $length, 'mimetype' => $type, 'description' => $title]); + + return '[attach]href="' . $media['url'] . '" length="' . $media['size'] . + '" type="' . $media['mimetype'] . '" title="' . $media['description'] . '"[/attach]'; + } + + /** + * Fetch additional data for the provided media array + * + * @param array $media + * @return array media array with additional data + */ + public static function fetchAdditionalData(array $media) + { + // Fetch the mimetype or size if missing. + // We don't do it for torrent links since they need special treatment. + // We don't do this for images, since we are fetching their details some lines later anyway. + if (!in_array($media['type'], [self::TORRENT, self::IMAGE]) && (empty($media['mimetype']) || empty($media['size']))) { + $timeout = DI::config()->get('system', 'xrd_timeout'); + $curlResult = DI::httpRequest()->head($media['url'], ['timeout' => $timeout]); + if ($curlResult->isSuccess()) { + $header = $curlResult->getHeaderArray(); + if (empty($media['mimetype']) && !empty($header['content-type'])) { + $media['mimetype'] = $header['content-type']; + } + if (empty($media['size']) && !empty($header['content-length'])) { + $media['size'] = $header['content-length']; + } + } + } + + $filetype = !empty($media['mimetype']) ? strtolower(substr($media['mimetype'], 0, strpos($media['mimetype'], '/'))) : ''; + + if (($media['type'] == self::IMAGE) || ($filetype == 'image')) { $imagedata = Images::getInfoFromURLCached($media['url']); if (!empty($imagedata)) { $media['mimetype'] = $imagedata['mime']; @@ -80,9 +147,7 @@ class Media } } } - - $result = DBA::insert('post-media', $media, true); - Logger::info('Stored media', ['result' => $result, 'media' => $media, 'callstack' => System::callstack()]); + return $media; } /** @@ -168,4 +233,29 @@ class Media return trim($body); } + + /** + * Add media links from the attach field + * + * @param integer $uriid + * @param string $attach + * @return void + */ + public static function insertFromAttachment(int $uriid, string $attach) + { + if (!preg_match_all('|\[attach\]href=\"(.*?)\" length=\"(.*?)\" type=\"(.*?)\"(?: title=\"(.*?)\")?|', $attach, $matches, PREG_SET_ORDER)) { + return; + } + + foreach ($matches as $attachment) { + $media['type'] = self::DOCUMENT; + $media['uri-id'] = $uriid; + $media['url'] = $attachment[1]; + $media['size'] = $attachment[2]; + $media['mimetype'] = $attachment[3]; + $media['description'] = $attachment[4] ?? ''; + + self::insert($media); + } + } } diff --git a/src/Protocol/ActivityPub/Processor.php b/src/Protocol/ActivityPub/Processor.php index 63732b0e15..4fa8a4e9f9 100644 --- a/src/Protocol/ActivityPub/Processor.php +++ b/src/Protocol/ActivityPub/Processor.php @@ -196,7 +196,8 @@ class Processor $item['attach'] = ''; } - $item['attach'] .= '[attach]href="' . $attach['url'] . '" length="' . ($attach['length'] ?? '0') . '" type="' . $attach['mediaType'] . '" title="' . ($attach['name'] ?? '') . '"[/attach]'; + $item['attach'] .= Post\Media::getAttachElement($attach['url'], + $attach['length'] ?? 0, $attach['mediaType'], $attach['name'] ?? ''); } } } diff --git a/src/Protocol/DFRN.php b/src/Protocol/DFRN.php index d20864cf7f..2186081022 100644 --- a/src/Protocol/DFRN.php +++ b/src/Protocol/DFRN.php @@ -39,6 +39,7 @@ use Friendica\Model\ItemURI; use Friendica\Model\Mail; use Friendica\Model\Notify\Type; use Friendica\Model\PermissionSet; +use Friendica\Model\Post; use Friendica\Model\Post\Category; use Friendica\Model\Profile; use Friendica\Model\Tag; @@ -2176,7 +2177,7 @@ class DFRN $item["attach"] = ""; } - $item["attach"] .= '[attach]href="' . $href . '" length="' . $length . '" type="' . $type . '" title="' . $title . '"[/attach]'; + $item["attach"] .= Post\Media::getAttachElement($href, $length, $type, $title); break; } } diff --git a/src/Protocol/Feed.php b/src/Protocol/Feed.php index 67baf4b2ae..4eb638be39 100644 --- a/src/Protocol/Feed.php +++ b/src/Protocol/Feed.php @@ -33,6 +33,7 @@ use Friendica\Database\DBA; use Friendica\DI; use Friendica\Model\Contact; use Friendica\Model\Item; +use Friendica\Model\Post; use Friendica\Model\Tag; use Friendica\Model\User; use Friendica\Util\DateTimeFormat; @@ -457,7 +458,7 @@ class Feed $attachments[] = ["link" => $href, "type" => $type, "length" => $length]; - $item["attach"] .= '[attach]href="' . $href . '" length="' . $length . '" type="' . $type . '"[/attach]'; + $item["attach"] .= Post\Media::getAttachElement($href, $length, $type); } $taglist = []; diff --git a/src/Protocol/OStatus.php b/src/Protocol/OStatus.php index 4b67d8ecb2..5c157c9805 100644 --- a/src/Protocol/OStatus.php +++ b/src/Protocol/OStatus.php @@ -36,6 +36,7 @@ use Friendica\Model\Contact; use Friendica\Model\Conversation; use Friendica\Model\Item; use Friendica\Model\ItemURI; +use Friendica\Model\Post; use Friendica\Model\Tag; use Friendica\Model\User; use Friendica\Network\Probe; @@ -1126,7 +1127,8 @@ class OStatus if (!isset($attribute['length'])) { $attribute['length'] = "0"; } - $item["attach"] .= '[attach]href="'.$attribute['href'].'" length="'.$attribute['length'].'" type="'.$attribute['type'].'" title="'.($attribute['title'] ?? '') .'"[/attach]'; + $item["attach"] .= Post\Media::getAttachElement($attribute['href'], + $attribute['length'], $attribute['type'], $attribute['title'] ?? ''); } break; case "related":