2016-04-19 03:38:38 +00:00
< ? php
2021-12-03 03:01:39 +00:00
2022-02-16 04:08:28 +00:00
namespace Code\Module ;
2016-04-19 03:38:38 +00:00
2019-04-19 21:34:47 +00:00
use App ;
2021-12-02 22:33:36 +00:00
use DOMDocument ;
use DomXPath ;
2022-02-16 04:08:28 +00:00
use Code\Web\Controller ;
use Code\Lib\Activity ;
use Code\Lib\ActivityStreams ;
use Code\Lib\Libzot ;
use Code\Lib\Channel ;
use Code\Lib\Oembed ;
2022-06-23 07:38:34 +00:00
use Code\Lib\Url ;
2022-07-12 02:39:31 +00:00
use Code\Lib\System ;
2022-02-16 04:08:28 +00:00
use Code\Lib as Zlib ;
use Code\Extend\Hook ;
2016-04-19 03:38:38 +00:00
2019-04-27 04:01:00 +00:00
require_once ( 'include/security.php' );
2016-04-19 03:38:38 +00:00
2021-12-02 23:02:31 +00:00
class Linkinfo extends Controller
{
2022-07-12 02:39:31 +00:00
protected $ident ;
2021-12-02 23:02:31 +00:00
public function get ()
{
logger ( 'linkinfo: ' . print_r ( $_REQUEST , true ), LOGGER_DEBUG );
2022-07-12 02:39:31 +00:00
// Google/YouTube does User-agent sniffing and will send you bad data if
// you don't supply a compatibility identifier in the UA string.
$ident = System :: get_project_name ();
$this -> ident = " Mozilla/5.0 (compatible; $ident ) " ;
2021-12-02 23:02:31 +00:00
$text = null ;
$str_tags = '' ;
$process_embed = true ;
$process_oembed = (( $_GET [ 'oembed' ]) ? true : false );
$process_zotobj = true ;
if ( local_channel ()) {
$saved_oembed = (( get_pconfig ( local_channel (), 'system' , 'linkinfo_embed' , true )) ? true : false );
if ( $saved_oembed !== $process_oembed ) {
set_pconfig ( local_channel (), 'system' , 'linkinfo_embed' , intval ( $process_oembed ));
}
}
$br = " \n " ;
2021-12-03 03:01:39 +00:00
if ( x ( $_GET , 'binurl' )) {
2021-12-02 23:02:31 +00:00
$url = trim ( hex2bin ( $_GET [ 'binurl' ]));
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
$url = trim ( $_GET [ 'url' ]);
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
2022-09-04 01:35:50 +00:00
if ( str_starts_with ( $url , '!' )) {
2021-12-02 23:02:31 +00:00
$process_embed = false ;
$url = substr ( $url , 1 );
}
$url = strip_zids ( $url );
2022-09-04 01:35:50 +00:00
if ( str_starts_with ( $url , 'geo:' )) {
2021-12-02 23:02:31 +00:00
if ( $process_embed ) {
echo $br . '[map=' . substr ( $url , 4 ) . ']' . $br ;
} else {
echo $br . '[url]' . $url . '[/url]' . $br ;
}
killme ();
}
2022-09-04 01:35:50 +00:00
if ( str_starts_with ( $url , 'tel:' ) || ( is_phone_number ( $url ) !== false )) {
2021-12-02 23:02:31 +00:00
$phone = $url ;
2022-09-04 01:35:50 +00:00
if ( ! str_starts_with ( $url , 'tel:' )) {
2021-12-02 23:02:31 +00:00
$url = 'tel:' . is_phone_number ( $url );
}
echo $br . '[url=' . $url . ']' . $phone . '[/url]' . $br ;
killme ();
}
$m = parse_url ( $url );
if ( ! $m [ 'scheme' ]) {
if ( strpos ( $url , '@' )) {
2022-09-03 21:30:13 +00:00
$xc = discover_resource ( $url );
2021-12-02 23:02:31 +00:00
if ( $xc ) {
2021-12-03 03:01:39 +00:00
$x = q (
" select * from xchan where xchan_hash = '%s' " ,
2021-12-02 23:02:31 +00:00
dbesc ( $xc )
);
if ( $x ) {
$url = $x [ 0 ][ 'xchan_url' ];
}
} else {
echo $br . '[url=mailto:' . $url . ']' . $url . '[/url]' . $br ;
killme ();
}
} else {
$url = 'http://' . $url ;
}
}
2021-12-03 03:01:39 +00:00
if ( $_GET [ 'title' ]) {
2021-12-02 23:02:31 +00:00
$title = strip_tags ( trim ( $_GET [ 'title' ]));
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
2021-12-03 03:01:39 +00:00
if ( $_GET [ 'description' ]) {
2021-12-02 23:02:31 +00:00
$text = strip_tags ( trim ( $_GET [ 'description' ]));
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
if ( $_GET [ 'tags' ]) {
$arr_tags = str_getcsv ( $_GET [ 'tags' ]);
if ( count ( $arr_tags )) {
array_walk ( $arr_tags , 'self::arr_add_hashes' );
$str_tags = $br . implode ( ' ' , $arr_tags ) . $br ;
}
}
logger ( 'linkinfo: ' . $url , LOGGER_DEBUG );
2023-10-27 23:52:09 +00:00
$zrl = isOWAEnabled ( $url );
2021-12-02 23:02:31 +00:00
if ( ! $process_embed ) {
if ( $zrl ) {
echo $br . '[zrl]' . $url . '[/zrl]' . $br ;
} else {
echo $br . '[url]' . $url . '[/url]' . $br ;
}
killme ();
}
2022-07-12 02:39:31 +00:00
$result = Url :: get ( $url , [ 'novalidate' => true , 'nobody' => true , 'useragent' => $this -> ident ]);
2021-12-02 23:02:31 +00:00
if ( $result [ 'success' ]) {
$hdrs = [];
$h = explode ( " \n " , $result [ 'header' ]);
foreach ( $h as $l ) {
list ( $k , $v ) = array_map ( " trim " , explode ( " : " , trim ( $l ), 2 ));
$hdrs [ strtolower ( $k )] = $v ;
}
2021-12-03 03:01:39 +00:00
if ( array_key_exists ( 'content-type' , $hdrs )) {
2021-12-02 23:02:31 +00:00
$type = $hdrs [ 'content-type' ];
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
if ( $type ) {
if ( stripos ( $type , 'image/' ) !== false ) {
$basename = basename ( $url );
2021-12-03 03:01:39 +00:00
if ( $zrl ) {
2021-12-02 23:02:31 +00:00
echo $br . '[zmg alt="' . $basename . '"]' . $url . '[/zmg]' . $br ;
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
echo $br . '[img alt="' . $basename . '"]' . $url . '[/img]' . $br ;
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
killme ();
}
if (( stripos ( $type , 'video/' ) !== false ) || ( $type === 'application/ogg' )) {
$thumb = self :: get_video_poster ( $url );
if ( $thumb ) {
2021-12-03 03:01:39 +00:00
if ( $zrl ) {
2021-12-02 23:02:31 +00:00
echo $br . '[zvideo poster=\'' . $thumb . '\']' . $url . '[/zvideo]' . $br ;
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
echo $br . '[video poster=\'' . $thumb . '\']' . $url . '[/video]' . $br ;
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
killme ();
}
2021-12-03 03:01:39 +00:00
if ( $zrl ) {
2021-12-02 23:02:31 +00:00
echo $br . '[zvideo]' . $url . '[/zvideo]' . $br ;
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
echo $br . '[video]' . $url . '[/video]' . $br ;
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
killme ();
}
if ( stripos ( $type , 'audio/' ) !== false ) {
2021-12-03 03:01:39 +00:00
if ( $zrl ) {
2021-12-02 23:02:31 +00:00
echo $br . '[zaudio]' . $url . '[/zaudio]' . $br ;
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
echo $br . '[audio]' . $url . '[/audio]' . $br ;
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
killme ();
}
if ( strtolower ( $type ) === 'text/calendar' ) {
2022-07-12 02:39:31 +00:00
$content = Url :: get ( $url , [ 'novalidate' => true , 'useragent' => $this -> ident ]);
2021-12-02 23:02:31 +00:00
if ( $content [ 'success' ]) {
$ev = ical_to_ev ( $content [ 'body' ]);
if ( $ev ) {
echo $br . format_event_bbcode ( $ev [ 0 ]) . $br ;
killme ();
}
}
}
if ( strtolower ( $type ) === 'application/pdf' || strtolower ( $type ) === 'application/x-pdf' ) {
echo $br . '[embed]' . $url . '[/embed]' . $br ;
killme ();
}
}
}
$template = $br . '[url=%s]%s[/url]%s' . $br ;
2022-09-04 01:35:50 +00:00
$arr = [ 'url' => $url , 'text' => '' ];
2021-12-02 23:02:31 +00:00
2022-02-12 08:50:48 +00:00
Hook :: call ( 'parse_link' , $arr );
2021-12-02 23:02:31 +00:00
if ( strlen ( $arr [ 'text' ])) {
echo $arr [ 'text' ];
killme ();
}
if ( $process_zotobj ) {
$x = Activity :: fetch ( $url , App :: get_channel ());
$y = null ;
if ( is_array ( $x )) {
if ( ActivityStreams :: is_an_actor ( $x [ 'type' ]) && $x [ 'id' ]) {
if ( check_siteallowed ( $x [ 'id' ]) && check_channelallowed ( $x [ 'id' ])) {
$url = $x [ 'url' ];
if ( is_array ( $url )) {
$url = $url [ 0 ][ 'href' ];
}
$name = (( $x [ 'name' ]) ? $x [ 'name' ] . ' (' . $x [ 'preferredUsername' ] . ')' : $x [ 'preferredUsername' ]);
if ( array_path_exists ( 'icon/url' , $x )) {
$text = $br . $br . '[zrl=' . $url . '][zmg=300x300]' . $x [ 'icon' ][ 'url' ] . '[/zmg][/zrl]' ;
}
$text .= $br . $br . '[zrl=' . $url . ']' . $name . '[/zrl]' . $br . $br ;
echo $text ;
killme ();
}
} else {
$y = new ActivityStreams ( $x );
2021-12-03 03:01:39 +00:00
if (
$y -> is_valid () && $y -> type === 'Announce' && is_array ( $y -> obj )
&& array_key_exists ( 'object' , $y -> obj ) && array_key_exists ( 'actor' , $y -> obj )
) {
2021-12-02 23:02:31 +00:00
// This is a relayed/forwarded Activity (as opposed to a shared/boosted object)
// Reparse the encapsulated Activity and use that instead
logger ( 'relayed activity' , LOGGER_DEBUG );
$y = new ActivityStreams ( $y -> obj );
}
}
if ( $y && $y -> is_valid ()) {
$z = Activity :: decode_note ( $y );
2021-12-03 03:01:39 +00:00
$r = q (
2022-06-16 21:48:13 +00:00
" select hubloc_hash, hubloc_network, hubloc_id_url, hubloc_url from hubloc where hubloc_deleted = 0 and ( hubloc_hash = '%s' OR hubloc_id_url = '%s') order by hubloc_id desc " ,
2021-12-02 23:02:31 +00:00
dbesc ( is_array ( $y -> actor ) ? $y -> actor [ 'id' ] : $y -> actor ),
dbesc ( is_array ( $y -> actor ) ? $y -> actor [ 'id' ] : $y -> actor )
);
if ( $r ) {
$r = Libzot :: zot_record_preferred ( $r );
if ( $z ) {
$z [ 'author_xchan' ] = $r [ 'hubloc_hash' ];
}
}
if ( $z ) {
// do not allow somebody to embed a post that was blocked by the site admin
// We *will* let them over-rule any blocks they created themselves
if ( check_siteallowed ( $r [ 'hubloc_id_url' ]) && check_channelallowed ( $z [ 'author_xchan' ])) {
$s = new Zlib\Share ( $z );
echo $s -> bbcode ();
2022-04-21 21:46:21 +00:00
echo " \n " . '[attachment]' . $z [ 'mid' ] . '[/attachment]' . " \n " ;
2021-12-02 23:02:31 +00:00
killme ();
}
}
}
}
}
2022-04-21 21:46:21 +00:00
if ( $process_oembed ) {
$x = Oembed :: process ( $url );
if ( $x ) {
echo $x ;
killme ();
}
}
2021-12-02 23:02:31 +00:00
if ( $url && $title && $text ) {
$text = $br . '[quote]' . trim ( $text ) . '[/quote]' . $br ;
2022-09-04 01:35:50 +00:00
$title = str_replace ([ " \r " , " \n " ], [ '' , '' ], $title );
2021-12-02 23:02:31 +00:00
$result = sprintf ( $template , $url , ( $title ) ? $title : $url , $text ) . $str_tags ;
logger ( 'linkinfo (unparsed): returns: ' . $result );
echo $result ;
killme ();
}
2022-07-12 02:39:31 +00:00
$siteinfo = self :: parseurl_getsiteinfo ( $url , $this -> ident );
2021-12-02 23:02:31 +00:00
// If the site uses this platform, use zrl rather than url so they get zids sent to them by default
2023-10-27 23:52:09 +00:00
if ( isOWAEnabled ( $url )) {
2021-12-02 23:02:31 +00:00
$template = str_replace ( 'url' , 'zrl' , $template );
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
if ( $siteinfo [ " title " ] == " " ) {
echo sprintf ( $template , $url , $url , '' ) . $str_tags ;
killme ();
} else {
$text = $siteinfo [ " text " ];
$title = $siteinfo [ " title " ];
}
$image = " " ;
if ( isset ( $siteinfo [ 'images' ]) && is_array ( $siteinfo [ 'images' ]) && count ( $siteinfo [ " images " ])) {
/* Execute below code only if image is present in siteinfo */
$total_images = 0 ;
$max_images = get_config ( 'system' , 'max_bookmark_images' );
2021-12-03 03:01:39 +00:00
if ( $max_images === false ) {
2021-12-02 23:02:31 +00:00
$max_images = 2 ;
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
$max_images = intval ( $max_images );
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
foreach ( $siteinfo [ " images " ] as $imagedata ) {
if ( $url ) {
$image .= sprintf ( '[url=%s]' , $url );
}
$image .= '[img=' . $imagedata [ " width " ] . 'x' . $imagedata [ " height " ] . ']' . $imagedata [ " src " ] . '[/img]' ;
if ( $url ) {
$image .= '[/url]' ;
}
$image .= " \n " ;
$total_images ++ ;
2021-12-03 03:01:39 +00:00
if ( $max_images && $max_images >= $total_images ) {
2021-12-02 23:02:31 +00:00
break ;
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
}
}
if ( strlen ( $text )) {
$text = $br . '[quote]' . trim ( $text ) . '[/quote]' . $br ;
}
if ( $image ) {
$text = $br . $br . $image . $text ;
}
2022-09-04 01:35:50 +00:00
$title = str_replace ([ " \r " , " \n " ], [ '' , '' ], $title );
2021-12-02 23:02:31 +00:00
$result = sprintf ( $template , $url , ( $title ) ? $title : $url , $text ) . $str_tags ;
logger ( 'linkinfo: returns: ' . $result , LOGGER_DEBUG );
echo trim ( $result );
killme ();
}
2022-09-04 01:35:50 +00:00
public static function deletexnode ( $doc , $node )
2021-12-02 23:02:31 +00:00
{
$xpath = new DomXPath ( $doc );
$list = $xpath -> query ( " // " . $node );
2021-12-03 03:01:39 +00:00
foreach ( $list as $child ) {
2021-12-02 23:02:31 +00:00
$child -> parentNode -> removeChild ( $child );
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
}
public static function completeurl ( $url , $scheme )
{
$urlarr = parse_url ( $url );
2021-12-03 03:01:39 +00:00
if ( isset ( $urlarr [ " scheme " ])) {
2021-12-02 23:02:31 +00:00
return ( $url );
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
$schemearr = parse_url ( $scheme );
$complete = $schemearr [ " scheme " ] . " :// " . $schemearr [ " host " ];
2021-12-03 03:01:39 +00:00
if ( $schemearr [ " port " ] != " " ) {
2021-12-02 23:02:31 +00:00
$complete .= " : " . $schemearr [ " port " ];
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
2022-09-04 01:35:50 +00:00
if ( ! str_starts_with ( $urlarr [ 'path' ], '/' )) {
2021-12-02 23:02:31 +00:00
$complete .= '/' ;
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
$complete .= $urlarr [ " path " ];
2021-12-03 03:01:39 +00:00
if ( $urlarr [ " query " ] != " " ) {
2021-12-02 23:02:31 +00:00
$complete .= " ? " . $urlarr [ " query " ];
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
2021-12-03 03:01:39 +00:00
if ( $urlarr [ " fragment " ] != " " ) {
2021-12-02 23:02:31 +00:00
$complete .= " # " . $urlarr [ " fragment " ];
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
return ( $complete );
}
public static function get_video_poster ( $url )
{
2022-09-04 01:35:50 +00:00
if ( ! str_contains ( $url , z_root () . '/cloud/' )) {
2021-12-02 23:02:31 +00:00
return EMPTY_STR ;
}
$m = parse_url ( $url , PHP_URL_PATH );
if ( $m ) {
// strip leading '/cloud/'
$m = substr ( $m , 7 );
}
$nick = substr ( $m , 0 , strpos ( $m , '/' ));
$p = substr ( $m , strpos ( $m , '/' ) + 1 );
// get the channel to check permissions
2022-01-25 01:26:12 +00:00
$u = Channel :: from_username ( $nick );
2021-12-02 23:02:31 +00:00
if ( $u && $p ) {
$sql_extra = permissions_sql ( intval ( $u [ 'channel_id' ]));
2021-12-03 03:01:39 +00:00
$r = q (
" select hash, content from attach where display_path = '%s' and uid = %d and os_storage = 1 $sql_extra limit 1 " ,
2021-12-02 23:02:31 +00:00
dbesc ( $p ),
intval ( $u [ 'channel_id' ])
);
if ( $r ) {
$path = dbunescbin ( $r [ 0 ][ 'content' ]);
if ( $path && @ file_exists ( $path . '.thumb' )) {
return z_root () . '/poster/' . $nick . '/' . $r [ 0 ][ 'hash' ];
}
}
}
return EMPTY_STR ;
}
2022-07-12 02:39:31 +00:00
public static function parseurl_getsiteinfo ( $url , $ident )
2021-12-02 23:02:31 +00:00
{
$siteinfo = [];
2022-07-12 02:39:31 +00:00
$result = Url :: get ( $url , [ 'novalidate' => true , 'useragent' => $ident ]);
2021-12-03 03:01:39 +00:00
if ( ! $result [ 'success' ]) {
2021-12-02 23:02:31 +00:00
return $siteinfo ;
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
$header = $result [ 'header' ];
$body = $result [ 'body' ];
// Check codepage in HTTP headers or HTML if not exist
$cp = ( preg_match ( '/Content-Type: text\/html; charset=(.+)\r\n/i' , $header , $o ) ? $o [ 1 ] : '' );
2021-12-03 03:01:39 +00:00
if ( empty ( $cp )) {
2021-12-02 23:02:31 +00:00
$cp = ( preg_match ( '/meta.+content=["|\']text\/html; charset=([^"|\']+)/i' , $body , $o ) ? $o [ 1 ] : 'AUTO' );
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
$body = mb_convert_encoding ( $body , 'UTF-8' , $cp );
$body = mb_convert_encoding ( $body , 'HTML-ENTITIES' , " UTF-8 " );
2022-01-07 09:37:56 +00:00
if ( ! $body ) {
return $siteinfo ;
}
try {
$doc = new DOMDocument ();
$doc -> loadHTML ( $body );
2022-09-04 00:01:52 +00:00
} catch ( \Exception $e ) {
2022-01-07 09:37:56 +00:00
return $siteinfo ;
}
2021-12-02 23:02:31 +00:00
self :: deletexnode ( $doc , 'style' );
self :: deletexnode ( $doc , 'script' );
self :: deletexnode ( $doc , 'option' );
self :: deletexnode ( $doc , 'h1' );
self :: deletexnode ( $doc , 'h2' );
self :: deletexnode ( $doc , 'h3' );
self :: deletexnode ( $doc , 'h4' );
self :: deletexnode ( $doc , 'h5' );
self :: deletexnode ( $doc , 'h6' );
self :: deletexnode ( $doc , 'ol' );
self :: deletexnode ( $doc , 'ul' );
$xpath = new DomXPath ( $doc );
$list = $xpath -> query ( " //title " );
2021-12-03 03:01:39 +00:00
foreach ( $list as $node ) {
2021-12-02 23:02:31 +00:00
$siteinfo [ " title " ] = html_entity_decode ( $node -> nodeValue , ENT_QUOTES , " UTF-8 " );
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
$list = $xpath -> query ( " //meta[@name] " );
foreach ( $list as $node ) {
$attr = [];
2021-12-03 03:01:39 +00:00
if ( $node -> attributes -> length ) {
foreach ( $node -> attributes as $attribute ) {
2021-12-02 23:02:31 +00:00
$attr [ $attribute -> name ] = $attribute -> value ;
2021-12-03 03:01:39 +00:00
}
}
2021-12-02 23:02:31 +00:00
$attr [ " content " ] = html_entity_decode ( $attr [ " content " ], ENT_QUOTES , " UTF-8 " );
switch ( strtolower ( $attr [ " name " ])) {
case " fulltitle " :
$siteinfo [ " title " ] = trim ( $attr [ " content " ]);
break ;
case " description " :
$siteinfo [ " text " ] = trim ( $attr [ " content " ]);
break ;
case " thumbnail " :
$siteinfo [ " image " ] = $attr [ " content " ];
break ;
case " twitter:image " :
$siteinfo [ " image " ] = $attr [ " content " ];
break ;
case " twitter:image:src " :
$siteinfo [ " image " ] = $attr [ " content " ];
break ;
case " twitter:card " :
if (( $siteinfo [ " type " ] == " " ) || ( $attr [ " content " ] == " photo " )) {
$siteinfo [ " type " ] = $attr [ " content " ];
}
break ;
case " twitter:description " :
$siteinfo [ " text " ] = trim ( $attr [ " content " ]);
break ;
case " twitter:title " :
$siteinfo [ " title " ] = trim ( $attr [ " content " ]);
break ;
case " dc.title " :
$siteinfo [ " title " ] = trim ( $attr [ " content " ]);
break ;
case " dc.description " :
$siteinfo [ " text " ] = trim ( $attr [ " content " ]);
break ;
case " keywords " :
$keywords = explode ( " , " , $attr [ " content " ]);
break ;
case " news_keywords " :
$keywords = explode ( " , " , $attr [ " content " ]);
break ;
}
}
$list = $xpath -> query ( " //meta[@property] " );
foreach ( $list as $node ) {
$attr = [];
2021-12-03 03:01:39 +00:00
if ( $node -> attributes -> length ) {
foreach ( $node -> attributes as $attribute ) {
2021-12-02 23:02:31 +00:00
$attr [ $attribute -> name ] = $attribute -> value ;
2021-12-03 03:01:39 +00:00
}
}
2021-12-02 23:02:31 +00:00
$attr [ " content " ] = html_entity_decode ( $attr [ " content " ], ENT_QUOTES , " UTF-8 " );
switch ( strtolower ( $attr [ " property " ])) {
case " og:image " :
$siteinfo [ " image " ] = $attr [ " content " ];
break ;
2023-12-05 19:47:59 +00:00
case " og:image:alt " :
$siteinfo [ " image_alt " ] = $attr [ " content " ];
break ;
2021-12-02 23:02:31 +00:00
case " og:title " :
$siteinfo [ " title " ] = $attr [ " content " ];
break ;
case " og:description " :
$siteinfo [ " text " ] = $attr [ " content " ];
break ;
}
}
if ( $siteinfo [ " image " ] == " " ) {
$list = $xpath -> query ( " //img[@src] " );
foreach ( $list as $node ) {
$attr = [];
2021-12-03 03:01:39 +00:00
if ( $node -> attributes -> length ) {
foreach ( $node -> attributes as $attribute ) {
2021-12-02 23:02:31 +00:00
$attr [ $attribute -> name ] = $attribute -> value ;
2021-12-03 03:01:39 +00:00
}
}
2021-12-02 23:02:31 +00:00
$src = self :: completeurl ( $attr [ " src " ], $url );
2023-12-05 19:47:59 +00:00
$alt = $attr [ " alt " ];
2021-12-02 23:02:31 +00:00
$photodata = @ getimagesize ( $src );
if (( $photodata ) && ( $photodata [ 0 ] > 150 ) and ( $photodata [ 1 ] > 150 )) {
if ( $photodata [ 0 ] > 300 ) {
$photodata [ 1 ] = round ( $photodata [ 1 ] * ( 300 / $photodata [ 0 ]));
$photodata [ 0 ] = 300 ;
}
if ( $photodata [ 1 ] > 300 ) {
$photodata [ 0 ] = round ( $photodata [ 0 ] * ( 300 / $photodata [ 1 ]));
$photodata [ 1 ] = 300 ;
}
2022-09-04 01:35:50 +00:00
$siteinfo [ " images " ][] = [ " src " => $src ,
2021-12-02 23:02:31 +00:00
" width " => $photodata [ 0 ],
2023-12-05 19:47:59 +00:00
" height " => $photodata [ 1 ],
" alt " => ( $alt ? ? " " )
];
2021-12-02 23:02:31 +00:00
}
}
} else {
$src = self :: completeurl ( $siteinfo [ " image " ], $url );
unset ( $siteinfo [ " image " ]);
2023-12-05 19:47:59 +00:00
$alt = $siteinfo [ " image_alt " ];
unset ( $siteinfo [ " image_alt " ]);
2021-12-02 23:02:31 +00:00
$photodata = @ getimagesize ( $src );
2021-12-03 03:01:39 +00:00
if (( $photodata ) && ( $photodata [ 0 ] > 10 ) and ( $photodata [ 1 ] > 10 )) {
2022-09-04 01:35:50 +00:00
$siteinfo [ " images " ][] = [ " src " => $src ,
2021-12-02 23:02:31 +00:00
" width " => $photodata [ 0 ],
2023-12-05 19:47:59 +00:00
" height " => $photodata [ 1 ],
" alt " => ( $alt ? ? '' )
];
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
}
if ( $siteinfo [ " text " ] == " " ) {
$text = " " ;
$list = $xpath -> query ( " //div[@class='article'] " );
2021-12-03 03:01:39 +00:00
foreach ( $list as $node ) {
if ( strlen ( $node -> nodeValue ) > 40 ) {
2021-12-02 23:02:31 +00:00
$text .= " " . trim ( $node -> nodeValue );
2021-12-03 03:01:39 +00:00
}
}
2021-12-02 23:02:31 +00:00
if ( $text == " " ) {
$list = $xpath -> query ( " //div[@class='content'] " );
2021-12-03 03:01:39 +00:00
foreach ( $list as $node ) {
if ( strlen ( $node -> nodeValue ) > 40 ) {
2021-12-02 23:02:31 +00:00
$text .= " " . trim ( $node -> nodeValue );
2021-12-03 03:01:39 +00:00
}
}
2021-12-02 23:02:31 +00:00
}
// If none text was found then take the paragraph content
if ( $text == " " ) {
$list = $xpath -> query ( " //p " );
2021-12-03 03:01:39 +00:00
foreach ( $list as $node ) {
if ( strlen ( $node -> nodeValue ) > 40 ) {
2021-12-02 23:02:31 +00:00
$text .= " " . trim ( $node -> nodeValue );
2021-12-03 03:01:39 +00:00
}
}
2021-12-02 23:02:31 +00:00
}
if ( $text != " " ) {
2022-09-04 01:35:50 +00:00
$text = trim ( str_replace ([ " \n " , " \r " ], [ " " , " " ], $text ));
2021-12-02 23:02:31 +00:00
2021-12-03 03:01:39 +00:00
while ( strpos ( $text , " " )) {
2021-12-02 23:02:31 +00:00
$text = trim ( str_replace ( " " , " " , $text ));
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
$siteinfo [ " text " ] = html_entity_decode ( substr ( $text , 0 , 350 ), ENT_QUOTES , " UTF-8 " ) . '...' ;
}
}
return ( $siteinfo );
}
private static function arr_add_hashes ( & $item , $k )
{
2022-09-04 01:35:50 +00:00
if ( ! str_starts_with ( $item , '#' )) {
2021-12-02 23:02:31 +00:00
$item = '#' . $item ;
}
}
2016-04-19 03:38:38 +00:00
}