streams/vendor/indieweb/rel-me/RelMe.php

227 lines
6.7 KiB
PHP
Raw Normal View History

2018-09-20 02:30:39 +00:00
<?php
namespace IndieWeb;
use DOMDocument;
use DOMXPath;
use Mf2; // for relative URL resolver
// force loading of resolveUrl function
class_exists('Mf2\Parser');
/**
* Adapted from php.net, added TESTING flag and header title case normalisation
*/
if (!function_exists('http_parse_headers') or defined('TESTING')) {
function http_parse_headers($raw_headers) {
$headers = array();
$key = '';
foreach (explode("\n", $raw_headers) as $i => $h) {
$h = explode(':', $h, 2);
$headerName = implode('-', array_map('ucfirst', explode('-', $h[0])));
// If dealing with a key:value line
if (isset($h[1])) {
if (!isset($headers[$headerName]))
$headers[$headerName] = trim($h[1]);
elseif (is_array($headers[$headerName])) {
$headers[$headerName] = array_merge($headers[$headerName], array(trim($h[1])));
} else {
$headers[$headerName] = array_merge(array($headers[$headerName]), array(trim($h[1])));
}
$key = $headerName;
} else {
// dealing with a continued line, $key is the last seen key
if (substr($h[0], 0, 1) == "\t")
$headers[$key] .= "\r\n\t" . trim($h[0]);
elseif (!$key) // I have no idea what this is supposed to be doing
$headers[0] = trim($h[0]);
}
}
return $headers;
}
}
/**
* Unparse URL
*
* Given an assoc. array of the form produced by parse_url, return a string
*
* Adapted from http://www.php.net/manual/en/function.parse-url.php#106731
*
* Note: the scheme and host are forced to lowercase to avoid case-sensitive issues.
*
* @param array $parsed_url
* @return string
*/
function unparseUrl(array $parsed_url) {
$user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
$pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
return implode('', array(
isset($parsed_url['scheme']) ? strtolower($parsed_url['scheme']) . '://' : '',
$user,
($user || $pass) ? "$pass@" : '',
isset($parsed_url['host']) ? strtolower($parsed_url['host']) : '',
isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '',
isset($parsed_url['path']) ? $parsed_url['path'] : '/',
isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '',
isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '',
));
}
function normaliseUrl($url) {
return $url === null ? null : unparseUrl(parse_url($url));
}
function httpGet($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
$response = curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);
$rawHeaders = mb_substr($response, 0, $info['header_size']);
$headers = http_parse_headers($rawHeaders);
$body = mb_substr($response, $info['header_size']);
return array($body, $headers, $info);
}
function followOneRedirect($url) {
list($body, $headers, $info) = httpGet($url);
if (strpos($info['http_code'], '3') === 0 and isset($headers['Location'])) {
return is_array($headers['Location'])
? current($headers['Location'])
: $headers['Location'];
} else {
return null;
}
}
/**
* rel-me Document URL
*
* Given a URL, resolves any redirects and returns the resolved URL, whether or not
* the redirect chain is secure (doesnt change protocol) and the redirect chain,
* for inspection.
*
* Example usage:
*
* list($profileUrl, $isSecure, $redirectChain) = IndieWeb\relMeDocumentUrl($me);
*
* $followOneRedirect defaults to IndieWeb\followOneRedirect but can be replaced for
* testing purposes.
*
* Returns [string URL, bool isSecure, array redirectChain]
* @return array
*/
function relMeDocumentUrl($url, $followOneRedirect = null) {
if (!is_callable($followOneRedirect))
$followOneRedirect = __NAMESPACE__ . '\followOneRedirect';
$previous = array();
$secure = true;
$is_https = false;
$currentUrl = $url;
while (true) {
// TODO: is resolving this URL correct behaviour here?
// should it be resolved just to the host?
$redirectedUrl = Mf2\resolveUrl($currentUrl, $followOneRedirect($currentUrl));
if ($redirectedUrl === null):
break;
elseif (in_array($redirectedUrl, $previous)):
break;
elseif ($is_https && parse_url($currentUrl, PHP_URL_SCHEME) !== parse_url($redirectedUrl, PHP_URL_SCHEME)):
$secure = false;
$previous[] = $currentUrl = $redirectedUrl;
break;
else:
if (parse_url($currentUrl, PHP_URL_SCHEME) == 'https') {
$is_https = true;
}
$currentUrl = $redirectedUrl;
$previous[] = $currentUrl;
endif;
}
return array($currentUrl, $secure, $previous);
}
/**
* rel-me links
* given the HTML and URL of a page, returns all the rel-me links found on that page
* @return array
*/
function relMeLinks($html, $url) {
$parser = new Mf2\Parser($html, $url);
$mf = $parser->parse();
$relMeLinks = @($mf['rels']['me'] ?: array());
return array_unique($relMeLinks);
}
/**
* URLs match other than scheme
* Given two URLs, checks if they are the same, ignorning their schemes
* @todo write tests for this
* @return bool
*/
function urlsMatchOtherThanScheme($url1, $url2) {
$p1 = parse_url($url1);
$p2 = parse_url($url2);
$p1['scheme'] = 'http';
$p2['scheme'] = 'http';
return unparseUrl($p1) === unparseUrl($p2);
}
/**
* Backlinking rel-me URL matches
*
* Used to check whether an inbound (silo to indie homepage) rel-me link can securely
* be considered to link to the indie profile URL. Given a back-linking URL and a profile
* URL, returns an array of [(bool) matches, (bool) secure, (array) redirect chain)]
*
* Example Usage:
*
* list($matches, $secure, $previous) = IndieWeb\backlinkingRelMeUrlMatches($inboundRelMeUrl, $meUrl);
*
* $followOneRedirect defaults to IndieWeb\followOneRedirect but can be replaced for
* testing purposes.
* @return array [matches, secure, previous]
*/
function backlinkingRelMeUrlMatches($backlinking, $meUrl, $followOneRedirect=null) {
if ($followOneRedirect === null)
$followOneRedirect = __NAMESPACE__ . '\followOneRedirect';
$meUrl = normaliseUrl($meUrl);
$previous = array();
$currentUrl = normaliseUrl($backlinking);
while (true) {
if ($currentUrl === $meUrl)
return array(true, true, $previous); // the URLs match and are secure
$redirectedUrl = normaliseUrl($followOneRedirect($currentUrl));
if ($redirectedUrl === null or in_array($redirectedUrl, $previous)):
return array(false, true, $previous); // The URLs dont match but are secure
elseif (parse_url($redirectedUrl, PHP_URL_SCHEME) !== parse_url($currentUrl, PHP_URL_SCHEME)):
if (urlsMatchOtherThanScheme($redirectedUrl, $meUrl)):
return array(true, false, $previous);
else:
return array(false, false, $previous);
endif;
else:
$currentUrl = $redirectedUrl;
$previous[] = $currentUrl;
endif;
}
}