[twitter] Rework twitter_expand_entities
- Uses Twitter-provided entity indices to avoid blanket string replacement and unwarranted tag search
This commit is contained in:
parent
96925a3a75
commit
1df61e8dc2
1 changed files with 117 additions and 139 deletions
|
@ -1202,163 +1202,141 @@ function twitter_fetchuser(App $a, $uid, $screen_name = "", $user_id = "")
|
||||||
return $contact_id;
|
return $contact_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
function twitter_expand_entities(App $a, $body, $item, $picture)
|
/**
|
||||||
|
* Replaces Twitter entities with Friendica-friendly links.
|
||||||
|
*
|
||||||
|
* The Twitter API gives indices for each entity, which allows for fine-grained replacement.
|
||||||
|
*
|
||||||
|
* First, we need to collect everything that needs to be replaced, what we will replace it with, and the start index.
|
||||||
|
* Then we sort the indices decreasingly, and we replace from the end of the body to the start in order for the next
|
||||||
|
* index to be correct even after the last replacement.
|
||||||
|
*
|
||||||
|
* @param string $body
|
||||||
|
* @param stdClass $status
|
||||||
|
* @param string $picture
|
||||||
|
* @return array
|
||||||
|
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||||
|
*/
|
||||||
|
function twitter_expand_entities($body, stdClass $status, $picture)
|
||||||
{
|
{
|
||||||
$plain = $body;
|
$plain = $body;
|
||||||
|
|
||||||
$tags_arr = [];
|
$tags = [];
|
||||||
|
|
||||||
foreach ($item->entities->hashtags AS $hashtag) {
|
$replacementList = [];
|
||||||
$url = '#[url=' . DI::baseUrl()->get() . '/search?tag=' . $hashtag->text . ']' . $hashtag->text . '[/url]';
|
|
||||||
$tags_arr['#' . $hashtag->text] = $url;
|
foreach ($status->entities->hashtags AS $hashtag) {
|
||||||
$body = str_replace('#' . $hashtag->text, $url, $body);
|
$replace = '#[url=' . DI::baseUrl()->get() . '/search?tag=' . $hashtag->text . ']' . $hashtag->text . '[/url]';
|
||||||
|
$tags['#' . $hashtag->text] = $replace;
|
||||||
|
|
||||||
|
$replacementList[$hashtag->indices[0]] = [
|
||||||
|
'replace' => $replace,
|
||||||
|
'length' => $hashtag->indices[1] - $hashtag->indices[0],
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($item->entities->user_mentions AS $mention) {
|
foreach ($status->entities->user_mentions AS $mention) {
|
||||||
$url = '@[url=https://twitter.com/' . rawurlencode($mention->screen_name) . ']' . $mention->screen_name . '[/url]';
|
$replace = '@[url=https://twitter.com/' . rawurlencode($mention->screen_name) . ']' . $mention->screen_name . '[/url]';
|
||||||
$tags_arr['@' . $mention->screen_name] = $url;
|
$tags['@' . $mention->screen_name] = $replace;
|
||||||
$body = str_replace('@' . $mention->screen_name, $url, $body);
|
|
||||||
|
$replacementList[$mention->indices[0]] = [
|
||||||
|
'replace' => $replace,
|
||||||
|
'length' => $mention->indices[1] - $mention->indices[0],
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isset($item->entities->urls)) {
|
// This URL if set will be used to add an attachment at the bottom of the post
|
||||||
$type = '';
|
$attachmentUrl = '';
|
||||||
$footerurl = '';
|
|
||||||
$footerlink = '';
|
|
||||||
$footer = '';
|
|
||||||
|
|
||||||
foreach ($item->entities->urls as $url) {
|
foreach ($status->entities->urls ?? [] as $url) {
|
||||||
$plain = str_replace($url->url, '', $plain);
|
$plain = str_replace($url->url, '', $plain);
|
||||||
|
|
||||||
if ($url->url && $url->expanded_url && $url->display_url) {
|
if ($url->url && $url->expanded_url && $url->display_url) {
|
||||||
// Quote tweet, we just remove the quoted tweet URL from the body, the share block will be added later.
|
|
||||||
if (!empty($item->quoted_status) && isset($item->quoted_status_id_str)
|
|
||||||
&& substr($url->expanded_url, -strlen($item->quoted_status_id_str)) == $item->quoted_status_id_str ) {
|
|
||||||
$body = str_replace($url->url, '', $body);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$expanded_url = $url->expanded_url;
|
// Quote tweet, we just remove the quoted tweet URL from the body, the share block will be added later.
|
||||||
|
if (!empty($status->quoted_status) && isset($status->quoted_status_id_str)
|
||||||
$final_url = Network::finalUrl($url->expanded_url);
|
&& substr($url->expanded_url, -strlen($status->quoted_status_id_str)) == $status->quoted_status_id_str
|
||||||
|
) {
|
||||||
$oembed_data = OEmbed::fetchURL($final_url);
|
$replacementList[$url->indices[0]] = [
|
||||||
|
'replace' => '',
|
||||||
if (empty($oembed_data) || empty($oembed_data->type)) {
|
'length' => $url->indices[1] - $url->indices[0],
|
||||||
continue;
|
];
|
||||||
}
|
|
||||||
|
|
||||||
// Quickfix: Workaround for URL with '[' and ']' in it
|
|
||||||
if (strpos($expanded_url, '[') || strpos($expanded_url, ']')) {
|
|
||||||
$expanded_url = $url->url;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($type == '') {
|
|
||||||
$type = $oembed_data->type;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($oembed_data->type == 'video') {
|
|
||||||
$type = $oembed_data->type;
|
|
||||||
$footerurl = $expanded_url;
|
|
||||||
$footerlink = '[url=' . $expanded_url . ']' . $url->display_url . '[/url]';
|
|
||||||
|
|
||||||
$body = str_replace($url->url, $footerlink, $body);
|
|
||||||
} elseif (($oembed_data->type == 'photo') && isset($oembed_data->url)) {
|
|
||||||
$body = str_replace($url->url, '[url=' . $expanded_url . '][img]' . $oembed_data->url . '[/img][/url]', $body);
|
|
||||||
} elseif ($oembed_data->type != 'link') {
|
|
||||||
$body = str_replace($url->url, '[url=' . $expanded_url . ']' . $url->display_url . '[/url]', $body);
|
|
||||||
} else {
|
|
||||||
$img_str = Network::fetchUrl($final_url, true, 4);
|
|
||||||
|
|
||||||
$tempfile = tempnam(get_temppath(), 'cache');
|
|
||||||
file_put_contents($tempfile, $img_str);
|
|
||||||
|
|
||||||
// See http://php.net/manual/en/function.exif-imagetype.php#79283
|
|
||||||
if (filesize($tempfile) > 11) {
|
|
||||||
$mime = image_type_to_mime_type(exif_imagetype($tempfile));
|
|
||||||
} else {
|
|
||||||
$mime = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
unlink($tempfile);
|
|
||||||
|
|
||||||
if (substr($mime, 0, 6) == 'image/') {
|
|
||||||
$type = 'photo';
|
|
||||||
$body = str_replace($url->url, '[img]' . $final_url . '[/img]', $body);
|
|
||||||
} else {
|
|
||||||
$type = $oembed_data->type;
|
|
||||||
$footerurl = $expanded_url;
|
|
||||||
$footerlink = '[url=' . $expanded_url . ']' . $url->display_url . '[/url]';
|
|
||||||
|
|
||||||
$body = str_replace($url->url, $footerlink, $body);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Footer will be taken care of with a share block in the case of a quote
|
|
||||||
if (empty($item->quoted_status)) {
|
|
||||||
if ($footerurl != '') {
|
|
||||||
$footer = add_page_info($footerurl, false, $picture);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (($footerlink != '') && (trim($footer) != '')) {
|
|
||||||
$removedlink = trim(str_replace($footerlink, '', $body));
|
|
||||||
|
|
||||||
if (($removedlink == '') || strstr($body, $removedlink)) {
|
|
||||||
$body = $removedlink;
|
|
||||||
}
|
|
||||||
|
|
||||||
$body .= $footer;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($footer == '' && $picture != '') {
|
|
||||||
$body .= "\n\n[img]" . $picture . "[/img]\n";
|
|
||||||
} elseif ($footer == '' && $picture == '') {
|
|
||||||
$body = add_page_info_to_body($body);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// it seems as if the entities aren't always covering all mentions. So the rest will be checked here
|
|
||||||
$tags = BBCode::getTags($body);
|
|
||||||
|
|
||||||
if (count($tags)) {
|
|
||||||
foreach ($tags as $tag) {
|
|
||||||
if (strstr(trim($tag), ' ')) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strpos($tag, '#') === 0) {
|
$expanded_url = $url->expanded_url;
|
||||||
if (strpos($tag, '[url=')) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't link tags that are already embedded in links
|
$final_url = Network::finalUrl($url->expanded_url);
|
||||||
if (preg_match('/\[(.*?)' . preg_quote($tag, '/') . '(.*?)\]/', $body)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (preg_match('/\[(.*?)\]\((.*?)' . preg_quote($tag, '/') . '(.*?)\)/', $body)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$basetag = str_replace('_', ' ', substr($tag, 1));
|
$oembed_data = OEmbed::fetchURL($final_url);
|
||||||
$url = '#[url=' . DI::baseUrl()->get() . '/search?tag=' . $basetag . ']' . $basetag . '[/url]';
|
|
||||||
$body = str_replace($tag, $url, $body);
|
|
||||||
$tags_arr['#' . $basetag] = $url;
|
|
||||||
} elseif (strpos($tag, '@') === 0) {
|
|
||||||
if (strpos($tag, '[url=')) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$basetag = substr($tag, 1);
|
if (empty($oembed_data) || empty($oembed_data->type)) {
|
||||||
$url = '@[url=https://twitter.com/' . rawurlencode($basetag) . ']' . $basetag . '[/url]';
|
continue;
|
||||||
$body = str_replace($tag, $url, $body);
|
|
||||||
$tags_arr['@' . $basetag] = $url;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Quickfix: Workaround for URL with '[' and ']' in it
|
||||||
|
if (strpos($expanded_url, '[') || strpos($expanded_url, ']')) {
|
||||||
|
$expanded_url = $url->url;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($oembed_data->type == 'video') {
|
||||||
|
$attachmentUrl = $expanded_url;
|
||||||
|
$replace = '';
|
||||||
|
} elseif (($oembed_data->type == 'photo') && isset($oembed_data->url)) {
|
||||||
|
$replace = '[url=' . $expanded_url . '][img]' . $oembed_data->url . '[/img][/url]';
|
||||||
|
} elseif ($oembed_data->type != 'link') {
|
||||||
|
$replace = '[url=' . $expanded_url . ']' . $url->display_url . '[/url]';
|
||||||
|
} else {
|
||||||
|
$img_str = Network::fetchUrl($final_url, true, 4);
|
||||||
|
|
||||||
|
$tempfile = tempnam(get_temppath(), 'cache');
|
||||||
|
file_put_contents($tempfile, $img_str);
|
||||||
|
|
||||||
|
// See http://php.net/manual/en/function.exif-imagetype.php#79283
|
||||||
|
if (filesize($tempfile) > 11) {
|
||||||
|
$mime = image_type_to_mime_type(exif_imagetype($tempfile));
|
||||||
|
} else {
|
||||||
|
$mime = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
unlink($tempfile);
|
||||||
|
|
||||||
|
if (substr($mime, 0, 6) == 'image/') {
|
||||||
|
$replace = '[img]' . $final_url . '[/img]';
|
||||||
|
} else {
|
||||||
|
$attachmentUrl = $expanded_url;
|
||||||
|
$replace = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$replacementList[$url->indices[0]] = [
|
||||||
|
'replace' => $replace,
|
||||||
|
'length' => $url->indices[1] - $url->indices[0],
|
||||||
|
];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$tags = implode($tags_arr, ',');
|
krsort($replacementList);
|
||||||
|
|
||||||
|
foreach ($replacementList as $startIndex => $parameters) {
|
||||||
|
$body = Strings::substringReplace($body, $parameters['replace'], $startIndex, $parameters['length']);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Footer will be taken care of with a share block in the case of a quote
|
||||||
|
if (empty($status->quoted_status)) {
|
||||||
|
$footer = '';
|
||||||
|
if ($attachmentUrl) {
|
||||||
|
$footer = add_page_info($attachmentUrl, false, $picture);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trim($footer)) {
|
||||||
|
$body .= $footer;
|
||||||
|
} elseif ($picture) {
|
||||||
|
$body .= "\n\n[img]" . $picture . "[/img]\n";
|
||||||
|
} else {
|
||||||
|
$body = add_page_info_to_body($body);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return ['body' => $body, 'tags' => $tags, 'plain' => $plain];
|
return ['body' => $body, 'tags' => $tags, 'plain' => $plain];
|
||||||
}
|
}
|
||||||
|
@ -1554,9 +1532,9 @@ function twitter_createpost(App $a, $uid, $post, array $self, $create_user, $onl
|
||||||
// Search for media links
|
// Search for media links
|
||||||
$picture = twitter_media_entities($post, $postarray);
|
$picture = twitter_media_entities($post, $postarray);
|
||||||
|
|
||||||
$converted = twitter_expand_entities($a, $postarray['body'], $post, $picture);
|
$converted = twitter_expand_entities($postarray['body'], $post, $picture);
|
||||||
$postarray['body'] = $converted["body"];
|
$postarray['body'] = $converted['body'];
|
||||||
$postarray['tag'] = $converted["tags"];
|
$postarray['tag'] = implode($converted['tags'], ',');
|
||||||
$postarray['created'] = DateTimeFormat::utc($post->created_at);
|
$postarray['created'] = DateTimeFormat::utc($post->created_at);
|
||||||
$postarray['edited'] = DateTimeFormat::utc($post->created_at);
|
$postarray['edited'] = DateTimeFormat::utc($post->created_at);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue