streams/Code/Lib/Oembed.php
2023-10-28 10:52:09 +11:00

487 lines
16 KiB
PHP

<?php
namespace Code\Lib;
use App;
use DOMDocument;
use DOMXPath;
use Code\Lib\Cache;
use Code\Lib\System;
use Code\Extend\Hook;
use Code\Render\Theme;
use Code\Lib\Url;
class Oembed
{
public static function replacecb($matches)
{
$embedurl = $matches[1];
$result = self::action($embedurl);
if ($result['action'] === 'block') {
return '<a href="' . $result['url'] . '">' . $result['url'] . '</a>';
}
$j = self::fetch_url($result['url']);
$s = self::format_object($j);
return $s;
}
public static function action($embedurl)
{
$host = '';
$action = 'filter';
$embedurl = trim(str_replace('&amp;', '&', $embedurl));
//logger('oembed_action: ' . $embedurl, LOGGER_DEBUG, LOG_INFO);
if (strpos($embedurl, 'http://') === 0) {
if (intval(get_config('system', 'embed_sslonly'))) {
$action = 'block';
}
}
if (strpos($embedurl, '.well-known') !== false) {
$action = 'block';
}
// site allow/deny list
if (($x = get_config('system', 'embed_deny'))) {
if (($x) && (! is_array($x))) {
$x = explode("\n", $x);
}
if ($x) {
foreach ($x as $ll) {
$t = trim($ll);
if (($t) && (strpos($embedurl, $t) !== false)) {
$action = 'block';
break;
}
}
}
}
$found = false;
if (($x = get_config('system', 'embed_allow'))) {
if (($x) && (! is_array($x))) {
$x = explode("\n", $x);
}
if ($x) {
foreach ($x as $ll) {
$t = trim($ll);
if (($t) && (strpos($embedurl, $t) !== false) && ($action !== 'block')) {
$found = true;
$action = 'allow';
break;
}
}
}
if ((! $found) && ($action !== 'block')) {
$action = 'filter';
}
}
// allow individual members to block something that wasn't blocked already.
// They cannot over-ride the site to allow or change the filtering on an
// embed that is not allowed by the site admin.
if (local_channel()) {
if (($x = get_pconfig(local_channel(), 'system', 'embed_deny'))) {
if (($x) && (! is_array($x))) {
$x = explode("\n", $x);
}
if ($x) {
foreach ($x as $ll) {
$t = trim($ll);
if (($t) && (strpos($embedurl, $t) !== false)) {
$action = 'block';
break;
}
}
}
}
}
$arr = array('url' => $embedurl, 'action' => $action);
Hook::call('oembed_action', $arr);
//logger('action: ' . $arr['action'] . ' url: ' . $arr['url'], LOGGER_DEBUG,LOG_DEBUG);
return $arr;
}
// if the url is embeddable with oembed, return the bbcode link.
public static function process($url)
{
$j = self::fetch_url($url);
logger('oembed_process: ' . print_r($j, true), LOGGER_DATA, LOG_DEBUG);
if ($j && $j['type'] !== 'error') {
return '[embed]' . $url . '[/embed]';
}
return false;
}
public static function fetch_url($embedurl)
{
$noexts = [ '.mp3', '.mp4', '.ogg', '.ogv', '.oga', '.ogm', '.webm', '.opus', '.m4a', '.mov' ];
$result = self::action($embedurl);
$embedurl = $result['url'];
$action = $result['action'];
foreach ($noexts as $ext) {
if (strpos(strtolower($embedurl), $ext) !== false) {
$action = 'block';
}
}
$txt = null;
// we should try to cache this and avoid a lookup on each render
$is_matrix = isOWAEnabled($embedurl);
$zrl = ((get_config('system', 'oembed_zrl')) ? $is_matrix : false);
$furl = ((local_channel() && $zrl) ? zid($embedurl) : $embedurl);
if ($action !== 'block' && (! get_config('system', 'oembed_cache_disable'))) {
$txt = Cache::get('[' . App::$videowidth . '] ' . $furl);
}
if (strpos(strtolower($embedurl), '.pdf') !== false && get_config('system', 'inline_pdf')) {
$action = 'allow';
$j = [
'html' => '<object data="' . $embedurl . '" type="application/pdf" style="width: 100%; height: 300px;"></object>',
'title' => t('View PDF'),
'type' => 'pdf'
];
// set $txt to something so that we don't attempt to fetch what could be a lengthy pdf.
$txt = EMPTY_STR;
}
if (is_null($txt)) {
$txt = "";
$ident = System::get_project_name();
if ($action !== 'block') {
// try oembed autodiscovery
$result = Url::get(
$furl,
[
'timeout' => 30,
'accept_content' => "text/*",
'novalidate' => true,
'session' => ((local_channel() && $zrl) ? true : false),
'useragent' => "Mozilla/5.0 (compatible; $ident)"
]
);
if ($result['success']) {
$html_text = $result['body'];
} else {
logger('fetch failure: ' . $furl);
}
if ($html_text) {
$dom = new DOMDocument();
@$dom->loadHTML($html_text);
if ($dom) {
$xpath = new DOMXPath($dom);
$attr = "oembed";
$xattr = self::build_xpath("class", "oembed");
$entries = $xpath->query("//link[@type='application/json+oembed']");
foreach ($entries as $e) {
$href = $e->getAttributeNode("href")->nodeValue;
$x = Url::get($href . '&maxwidth=' . App::$videowidth);
if ($x['success']) {
$txt = $x['body'];
} else {
logger('fetch failed: ' . $href);
}
break;
}
// soundcloud is now using text/json+oembed instead of application/json+oembed,
// others may be also
$entries = $xpath->query("//link[@type='text/json+oembed']");
foreach ($entries as $e) {
$href = $e->getAttributeNode("href")->nodeValue;
$x = Url::get($href . '&maxwidth=' . App::$videowidth);
if ($x['success']) {
$txt = $x['body'];
} else {
logger('json fetch failed: ' . $href);
}
break;
}
}
}
}
if ($txt == false || $txt == "") {
$x = array('url' => $embedurl,'videowidth' => App::$videowidth);
Hook::call('oembed_probe', $x);
if (array_key_exists('embed', $x)) {
$txt = $x['embed'];
}
}
$txt = trim($txt);
if ($txt[0] != "{") {
$txt = '{"type":"error"}';
}
// save in cache
if (! get_config('system', 'oembed_cache_disable')) {
Cache::set('[' . App::$videowidth . '] ' . $furl, $txt);
}
}
if (! $j) {
$j = json_decode($txt, true);
}
if (! $j) {
$j = [];
}
if ($action === 'filter') {
if ($j['html']) {
$orig = $j['html'];
$allow_position = (($is_matrix) ? true : false);
// some sites (e.g. Mastodon) wrap their entire embed in an iframe
// which we will purify away and which we provide anyway.
// So if we see this, grab the frame src url and use that
// as the embed content - which will still need to be purified.
if (preg_match('#\<iframe(.*?)src\=[\'\"](.*?)[\'\"]#', $j['html'], $matches)) {
$x = Url::get($matches[2]);
$orig = $j['html'] = $x['body'];
}
// logger('frame src: ' . $j['html'], LOGGER_DATA);
$j['html'] = purify_html($j['html'], (($allow_position) ? [ 'allow_position' ] : []));
if ($j['html'] != $orig) {
// logger('oembed html was purified. original: ' . $orig . ' purified: ' . $j['html'], LOGGER_DEBUG, LOG_INFO);
}
$orig_len = mb_strlen(preg_replace('/\s+/', '', $orig));
$new_len = mb_strlen(preg_replace('/\s+/', '', $j['html']));
if (stripos($orig, '<script') || (! $new_len)) {
$j['type'] = 'error';
} elseif ($orig_len) {
$ratio = $new_len / $orig_len;
if ($ratio < 0.5) {
$j['type'] = 'error';
logger('oembed html truncated: ' . $ratio, LOGGER_DEBUG, LOG_INFO);
}
}
}
}
$j['embedurl'] = $embedurl;
$j['zrl'] = $is_matrix;
// logger('fetch return: ' . print_r($j,true));
return $j;
}
public static function format_object($j)
{
$embedurl = $j['embedurl'];
// logger('format: ' . print_r($j,true));
$jhtml = self::iframe($j['embedurl'], (isset($j['width']) ? $j['width'] : null), (isset($j['height']) ? $j['height'] : null));
$ret = "<span class='oembed " . $j['type'] . "'>";
switch ($j['type']) {
case "video": {
if (isset($j['thumbnail_url'])) {
$tw = (isset($j['thumbnail_width'])) ? $j['thumbnail_width'] : 200;
$th = (isset($j['thumbnail_height'])) ? $j['thumbnail_height'] : 180;
$tr = $tw / $th;
$th = 120;
$tw = $th * $tr;
$tpl = Theme::get_template('oembed_video.tpl');
$ret .= replace_macros($tpl, array(
'$baseurl' => z_root(),
'$embedurl' => $embedurl,
'$escapedhtml' => base64_encode($jhtml),
'$tw' => $tw,
'$th' => $th,
'$turl' => $j['thumbnail_url'],
));
} else {
$ret = $jhtml;
}
$ret .= "<br>";
}
break;
case "photo": {
$ret .= "<img width='" . $j['width'] . "' src='" . $j['url'] . "'>";
$ret .= "<br>";
}
break;
case "link": {
if ($j['thumbnail_url']) {
if (isOWAEnabled($embedurl)) {
$embedurl = zid($embedurl);
$j['thumbnail_url'] = zid($j['thumbnail_url']);
}
$ret = '<a href="' . $embedurl . '" ><img src="' . $j['thumbnail_url'] . '" alt="thumbnail" /></a><br><br>';
}
//$ret = "<a href='".$embedurl."'>".$j['title']."</a>";
}
break;
case 'pdf': {
$ret = $j['html'];
break;
}
case "rich":
if ($j['zrl']) {
$ret = ((preg_match('/^<div[^>]+>(.*?)<\/div>$/is', $j['html'], $o)) ? $o[1] : $j['html']);
} else {
$ret .= $jhtml;
}
break;
}
// add link to source if not present in "rich" type
if ($j['type'] != 'rich' || !strpos($j['html'], $embedurl)) {
$embedlink = (isset($j['title'])) ? $j['title'] : $embedurl;
$ret .= '<br>' . "<a href='$embedurl' rel='oembed'>$embedlink</a>";
$ret .= "<br>";
if (isset($j['author_name'])) {
$ret .= t(' by ') . $j['author_name'];
}
if (isset($j['provider_name'])) {
$ret .= t(' on ') . $j['provider_name'];
}
} else {
// add <a> for html2bbcode conversion
$ret .= "<br><a href='$embedurl' rel='oembed'>$embedurl</a>";
}
$ret .= "<br style='clear:left'></span>";
return mb_convert_encoding($ret, 'HTML-ENTITIES', mb_detect_encoding($ret));
}
public static function iframe($src, $width, $height)
{
$scroll = ' scrolling="no" ';
if (! $width || strstr($width, '%')) {
$width = '640';
$scroll = ' scrolling="auto" ';
}
if (! $height || strstr($height, '%')) {
$height = '300';
$scroll = ' scrolling="auto" ';
}
// try and leave some room for the description line.
$height = intval($height) + 80;
$width = intval($width) + 40;
$s = z_root() . '/oembed/' . base64url_encode($src);
// Make sure any children are sandboxed within their own iframe.
return '<iframe ' . ' style="max-width: 100%;" ' . $scroll . 'height="' . $height . '" width="' . $width . '" src="' . $s . '" allowfullscreen frameborder="no" >'
. t('Embedded content') . '</iframe>';
}
public static function bbcode2html($text, $export = false, $target = '')
{
$stopoembed = get_config("system", "no_oembed");
if ($stopoembed || $export) {
return preg_replace("/\[embed\](.+?)\[\/embed\]/is", '<a href="$1" ' . $target . ' rel="nofollow noopener" >$1</a>', $text);
}
return preg_replace_callback("/\[embed\](.+?)\[\/embed\]/is", ['\\Code\\Lib\\Oembed','replacecb'], $text);
}
public static function build_xpath($attr, $value)
{
// http://westhoffswelt.de/blog/0036_xpath_to_select_html_by_class.html
return "contains( normalize-space( @$attr ), ' $value ' ) or substring( normalize-space( @$attr ), 1, string-length( '$value' ) + 1 ) = '$value ' or substring( normalize-space( @$attr ), string-length( @$attr ) - string-length( '$value' ) ) = ' $value' or @$attr = '$value'";
}
public static function get_inner_html($node)
{
$innerHTML = '';
$children = $node->childNodes;
foreach ($children as $child) {
$innerHTML .= $child->ownerDocument->saveXML($child);
}
return $innerHTML;
}
/**
* Find <span class='oembed'>..<a href='url' rel='oembed'>..</a></span>
* and replace it with [embed]url[/embed]
*/
public static function html2bbcode($text)
{
// start parser only if 'oembed' is in text
if (strpos($text, "oembed")) {
// convert non ascii chars to html entities
$html_text = mb_convert_encoding($text, 'HTML-ENTITIES', mb_detect_encoding($text));
// If it doesn't parse at all, just return the text.
$dom = new DOMDocument();
@$dom->loadHTML($html_text);
if ($dom) {
$xpath = new DOMXPath($dom);
$attr = "oembed";
$xattr = self::build_xpath("class", "oembed");
$entries = $xpath->query("//span[$xattr]");
$xattr = "@rel='oembed'";//self::build_xpath("rel","oembed");
foreach ($entries as $e) {
$href = $xpath->evaluate("a[$xattr]/@href", $e)->item(0)->nodeValue;
if (!is_null($href)) {
$e->parentNode->replaceChild(new DOMText("[embed]" . $href . "[/embed]"), $e);
}
}
return self::get_inner_html($dom->getElementsByTagName("body")->item(0));
}
}
return $text;
}
}