mirror of
https://github.com/friendica/friendica
synced 2024-12-23 19:20:17 +00:00
Move "Network::finalUrl" to "HTTPRequest" class
This commit is contained in:
parent
57587efe58
commit
60e18736b0
3 changed files with 123 additions and 124 deletions
|
@ -21,10 +21,13 @@
|
||||||
|
|
||||||
namespace Friendica\Network;
|
namespace Friendica\Network;
|
||||||
|
|
||||||
|
use DOMDocument;
|
||||||
|
use DomXPath;
|
||||||
use Friendica\App;
|
use Friendica\App;
|
||||||
use Friendica\Core\Config\IConfig;
|
use Friendica\Core\Config\IConfig;
|
||||||
use Friendica\Core\Logger;
|
use Friendica\Core\Logger;
|
||||||
use Friendica\Core\System;
|
use Friendica\Core\System;
|
||||||
|
use Friendica\DI;
|
||||||
use Friendica\Util\Network;
|
use Friendica\Util\Network;
|
||||||
use Friendica\Util\Profiler;
|
use Friendica\Util\Profiler;
|
||||||
use Psr\Log\LoggerInterface;
|
use Psr\Log\LoggerInterface;
|
||||||
|
@ -323,6 +326,124 @@ class HTTPRequest
|
||||||
return $curlResponse;
|
return $curlResponse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the original URL of the provided URL
|
||||||
|
*
|
||||||
|
* This function strips tracking query params and follows redirections, either
|
||||||
|
* through HTTP code or meta refresh tags. Stops after 10 redirections.
|
||||||
|
*
|
||||||
|
* @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
|
||||||
|
*
|
||||||
|
* @see ParseUrl::getSiteinfo
|
||||||
|
*
|
||||||
|
* @param string $url A user-submitted URL
|
||||||
|
* @param int $depth The current redirection recursion level (internal)
|
||||||
|
* @param bool $fetchbody Wether to fetch the body or not after the HEAD requests
|
||||||
|
* @return string A canonical URL
|
||||||
|
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||||
|
*/
|
||||||
|
public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
|
||||||
|
{
|
||||||
|
$url = Network::stripTrackingQueryParams($url);
|
||||||
|
|
||||||
|
if ($depth > 10) {
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
|
||||||
|
$url = trim($url, "'");
|
||||||
|
|
||||||
|
$stamp1 = microtime(true);
|
||||||
|
|
||||||
|
$ch = curl_init();
|
||||||
|
curl_setopt($ch, CURLOPT_URL, $url);
|
||||||
|
curl_setopt($ch, CURLOPT_HEADER, 1);
|
||||||
|
curl_setopt($ch, CURLOPT_NOBODY, 1);
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
|
||||||
|
|
||||||
|
curl_exec($ch);
|
||||||
|
$curl_info = @curl_getinfo($ch);
|
||||||
|
$http_code = $curl_info['http_code'];
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
|
||||||
|
|
||||||
|
if ($http_code == 0) {
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_array($http_code, ['301', '302'])) {
|
||||||
|
if (!empty($curl_info['redirect_url'])) {
|
||||||
|
return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
|
||||||
|
} elseif (!empty($curl_info['location'])) {
|
||||||
|
return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for redirects in the meta elements of the body if there are no redirects in the header.
|
||||||
|
if (!$fetchbody) {
|
||||||
|
return self::finalUrl($url, ++$depth, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the file is too large then exit
|
||||||
|
if ($curl_info["download_content_length"] > 1000000) {
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if it isn't a HTML file then exit
|
||||||
|
if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
|
||||||
|
$stamp1 = microtime(true);
|
||||||
|
|
||||||
|
$ch = curl_init();
|
||||||
|
curl_setopt($ch, CURLOPT_URL, $url);
|
||||||
|
curl_setopt($ch, CURLOPT_HEADER, 0);
|
||||||
|
curl_setopt($ch, CURLOPT_NOBODY, 0);
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
|
||||||
|
|
||||||
|
$body = curl_exec($ch);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
|
||||||
|
|
||||||
|
if (trim($body) == "") {
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for redirect in meta elements
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
@$doc->loadHTML($body);
|
||||||
|
|
||||||
|
$xpath = new DomXPath($doc);
|
||||||
|
|
||||||
|
$list = $xpath->query("//meta[@content]");
|
||||||
|
foreach ($list as $node) {
|
||||||
|
$attr = [];
|
||||||
|
if ($node->attributes->length) {
|
||||||
|
foreach ($node->attributes as $attribute) {
|
||||||
|
$attr[$attribute->name] = $attribute->value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (@$attr["http-equiv"] == 'refresh') {
|
||||||
|
$path = $attr["content"];
|
||||||
|
$pathinfo = explode(";", $path);
|
||||||
|
foreach ($pathinfo as $value) {
|
||||||
|
if (substr(strtolower($value), 0, 4) == "url=") {
|
||||||
|
return self::finalUrl(substr($value, 4), ++$depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Curl wrapper
|
* Curl wrapper
|
||||||
*
|
*
|
||||||
|
|
|
@ -35,6 +35,7 @@ use Friendica\Model\Contact;
|
||||||
use Friendica\Model\Item;
|
use Friendica\Model\Item;
|
||||||
use Friendica\Model\Tag;
|
use Friendica\Model\Tag;
|
||||||
use Friendica\Model\User;
|
use Friendica\Model\User;
|
||||||
|
use Friendica\Network\HTTPRequest;
|
||||||
use Friendica\Util\DateTimeFormat;
|
use Friendica\Util\DateTimeFormat;
|
||||||
use Friendica\Util\Network;
|
use Friendica\Util\Network;
|
||||||
use Friendica\Util\ParseUrl;
|
use Friendica\Util\ParseUrl;
|
||||||
|
@ -350,7 +351,7 @@ class Feed
|
||||||
|
|
||||||
$orig_plink = $item["plink"];
|
$orig_plink = $item["plink"];
|
||||||
|
|
||||||
$item["plink"] = Network::finalUrl($item["plink"]);
|
$item["plink"] = HTTPRequest::finalUrl($item["plink"]);
|
||||||
|
|
||||||
$item["parent-uri"] = $item["uri"];
|
$item["parent-uri"] = $item["uri"];
|
||||||
|
|
||||||
|
|
|
@ -21,11 +21,8 @@
|
||||||
|
|
||||||
namespace Friendica\Util;
|
namespace Friendica\Util;
|
||||||
|
|
||||||
use DOMDocument;
|
|
||||||
use DomXPath;
|
|
||||||
use Friendica\Core\Hook;
|
use Friendica\Core\Hook;
|
||||||
use Friendica\Core\Logger;
|
use Friendica\Core\Logger;
|
||||||
use Friendica\Core\System;
|
|
||||||
use Friendica\DI;
|
use Friendica\DI;
|
||||||
|
|
||||||
class Network
|
class Network
|
||||||
|
@ -314,126 +311,6 @@ class Network
|
||||||
return self::unparseURL($parts);
|
return self::unparseURL($parts);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the original URL of the provided URL
|
|
||||||
*
|
|
||||||
* This function strips tracking query params and follows redirections, either
|
|
||||||
* through HTTP code or meta refresh tags. Stops after 10 redirections.
|
|
||||||
*
|
|
||||||
* @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
|
|
||||||
*
|
|
||||||
* @see ParseUrl::getSiteinfo
|
|
||||||
*
|
|
||||||
* @param string $url A user-submitted URL
|
|
||||||
* @param int $depth The current redirection recursion level (internal)
|
|
||||||
* @param bool $fetchbody Wether to fetch the body or not after the HEAD requests
|
|
||||||
* @return string A canonical URL
|
|
||||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
|
||||||
*/
|
|
||||||
public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
|
|
||||||
{
|
|
||||||
$a = DI::app();
|
|
||||||
|
|
||||||
$url = self::stripTrackingQueryParams($url);
|
|
||||||
|
|
||||||
if ($depth > 10) {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
$url = trim($url, "'");
|
|
||||||
|
|
||||||
$stamp1 = microtime(true);
|
|
||||||
|
|
||||||
$ch = curl_init();
|
|
||||||
curl_setopt($ch, CURLOPT_URL, $url);
|
|
||||||
curl_setopt($ch, CURLOPT_HEADER, 1);
|
|
||||||
curl_setopt($ch, CURLOPT_NOBODY, 1);
|
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
|
|
||||||
|
|
||||||
curl_exec($ch);
|
|
||||||
$curl_info = @curl_getinfo($ch);
|
|
||||||
$http_code = $curl_info['http_code'];
|
|
||||||
curl_close($ch);
|
|
||||||
|
|
||||||
DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
|
|
||||||
|
|
||||||
if ($http_code == 0) {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (in_array($http_code, ['301', '302'])) {
|
|
||||||
if (!empty($curl_info['redirect_url'])) {
|
|
||||||
return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
|
|
||||||
} elseif (!empty($curl_info['location'])) {
|
|
||||||
return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for redirects in the meta elements of the body if there are no redirects in the header.
|
|
||||||
if (!$fetchbody) {
|
|
||||||
return(self::finalUrl($url, ++$depth, true));
|
|
||||||
}
|
|
||||||
|
|
||||||
// if the file is too large then exit
|
|
||||||
if ($curl_info["download_content_length"] > 1000000) {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if it isn't a HTML file then exit
|
|
||||||
if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
$stamp1 = microtime(true);
|
|
||||||
|
|
||||||
$ch = curl_init();
|
|
||||||
curl_setopt($ch, CURLOPT_URL, $url);
|
|
||||||
curl_setopt($ch, CURLOPT_HEADER, 0);
|
|
||||||
curl_setopt($ch, CURLOPT_NOBODY, 0);
|
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
|
|
||||||
|
|
||||||
$body = curl_exec($ch);
|
|
||||||
curl_close($ch);
|
|
||||||
|
|
||||||
DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
|
|
||||||
|
|
||||||
if (trim($body) == "") {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for redirect in meta elements
|
|
||||||
$doc = new DOMDocument();
|
|
||||||
@$doc->loadHTML($body);
|
|
||||||
|
|
||||||
$xpath = new DomXPath($doc);
|
|
||||||
|
|
||||||
$list = $xpath->query("//meta[@content]");
|
|
||||||
foreach ($list as $node) {
|
|
||||||
$attr = [];
|
|
||||||
if ($node->attributes->length) {
|
|
||||||
foreach ($node->attributes as $attribute) {
|
|
||||||
$attr[$attribute->name] = $attribute->value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (@$attr["http-equiv"] == 'refresh') {
|
|
||||||
$path = $attr["content"];
|
|
||||||
$pathinfo = explode(";", $path);
|
|
||||||
foreach ($pathinfo as $value) {
|
|
||||||
if (substr(strtolower($value), 0, 4) == "url=") {
|
|
||||||
return self::finalUrl(substr($value, 4), ++$depth);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the matching part between two url
|
* Find the matching part between two url
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in a new issue