diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php
index fa5a0a5905..00d609cb60 100644
--- a/src/Content/Text/HTML.php
+++ b/src/Content/Text/HTML.php
@@ -33,6 +33,7 @@ use Friendica\Util\Network;
use Friendica\Util\Strings;
use Friendica\Util\XML;
use League\HTMLToMarkdown\HtmlConverter;
+use Psr\Http\Message\UriInterface;
class HTML
{
@@ -1007,4 +1008,51 @@ class HTML
return $text;
}
+
+ /**
+ * XPath arbitrary string quoting
+ *
+ * @see https://stackoverflow.com/a/45228168
+ * @param string $value
+ * @return string
+ */
+ public static function xpathQuote(string $value): string
+ {
+ if (false === strpos($value, '"')) {
+ return '"' . $value . '"';
+ }
+
+ if (false === strpos($value, "'")) {
+ return "'" . $value . "'";
+ }
+
+ // if the value contains both single and double quotes, construct an
+ // expression that concatenates all non-double-quote substrings with
+ // the quotes, e.g.:
+ //
+ // concat("'foo'", '"', "bar")
+ return 'concat(' . implode(', \'"\', ', array_map(['self', 'xpathQuote'], explode('"', $value))) . ')';
+ }
+
+ /**
+ * Checks if the provided URL is present in the DOM document in an element with the rel="me" attribute
+ *
+ * XHTML Friends Network http://gmpg.org/xfn/
+ *
+ * @param DOMDocument $doc
+ * @param UriInterface $meUrl
+ * @return bool
+ */
+ public static function checkRelMeLink(DOMDocument $doc, UriInterface $meUrl): bool
+ {
+ $xpath = new \DOMXpath($doc);
+
+ // This expression checks that "me" is among the space-delimited values of the "rel" attribute.
+ // And that the href attribute contains exactly the provided URL
+ $expression = "//*[contains(concat(' ', normalize-space(@rel), ' '), ' me ')][@href = " . self::xpathQuote($meUrl) . "]";
+
+ $result = $xpath->query($expression);
+
+ return $result !== false && $result->length > 0;
+ }
}
diff --git a/src/Worker/CheckRelMeProfileLink.php b/src/Worker/CheckRelMeProfileLink.php
index 987619e19a..ebd0254889 100644
--- a/src/Worker/CheckRelMeProfileLink.php
+++ b/src/Worker/CheckRelMeProfileLink.php
@@ -22,14 +22,14 @@
namespace Friendica\Worker;
use DOMDocument;
-use Friendica\DI;
+use Friendica\Content\Text\HTML;
use Friendica\Core\Logger;
+use Friendica\DI;
use Friendica\Model\Profile;
use Friendica\Model\User;
use Friendica\Network\HTTPClient\Client\HttpClientAccept;
use Friendica\Network\HTTPClient\Client\HttpClientOptions;
-use Friendica\Util\Network;
-use Friendica\Util\Strings;
+use GuzzleHttp\Psr7\Uri;
/* This class is used to verify the homepage link of a user profile.
* To do so, we look for rel="me" links in the given homepage, if one
@@ -56,43 +56,37 @@ class CheckRelMeProfileLink
{
Logger::notice('Verifying the homepage', ['uid' => $uid]);
Profile::update(['homepage_verified' => false], $uid);
- $homepageUrlVerified = false;
- $owner = User::getOwnerDataById($uid);
- if (!empty($owner['homepage'])) {
- $xrd_timeout = DI::config()->get('system', 'xrd_timeout');
- $curlResult = DI::httpClient()->get($owner['homepage'], $accept_content = HttpClientAccept::HTML, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
- if ($curlResult->isSuccess()) {
- $content = $curlResult->getBody();
- if (!$content) {
- Logger::notice('Empty body of the fetched homepage link). Cannot verify the relation to profile of UID %s.', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
- } else {
- $doc = new DOMDocument();
- @$doc->loadHTML($content);
- if (!$doc) {
- Logger::notice('Could not parse the content');
- } else {
- foreach ($doc->getElementsByTagName('a') as $link) {
- $rel = $link->getAttribute('rel');
- if ($rel == 'me') {
- $href = $link->getAttribute('href');
- if (!$homepageUrlVerified && Network::isValidHttpUrl($href)) {
- $homepageUrlVerified = Strings::compareLink($owner['url'], $href);
- }
- }
- }
- }
- if ($homepageUrlVerified) {
- Profile::update(['homepage_verified' => true], $uid);
- Logger::notice('Homepage URL verified', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
- } else {
- Logger::notice('Homepage URL could not be verified', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
- }
- }
- } else {
- Logger::notice('Could not cURL the homepage URL', ['owner homepage' => $owner['homepage']]);
- }
- } else {
+
+ $owner = User::getOwnerDataById($uid);
+ if (empty($owner['homepage'])) {
Logger::notice('The user has no homepage link.', ['uid' => $uid]);
+ return;
+ }
+
+ $xrd_timeout = DI::config()->get('system', 'xrd_timeout');
+ $curlResult = DI::httpClient()->get($owner['homepage'], HttpClientAccept::HTML, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
+ if (!$curlResult->isSuccess()) {
+ Logger::notice('Could not cURL the homepage URL', ['owner homepage' => $owner['homepage']]);
+ return;
+ }
+
+ $content = $curlResult->getBody();
+ if (!$content) {
+ Logger::notice('Empty body of the fetched homepage link). Cannot verify the relation to profile of UID %s.', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
+ return;
+ }
+
+ $doc = new DOMDocument();
+ if (!@$doc->loadHTML($content)) {
+ Logger::notice('Could not parse the content');
+ return;
+ }
+
+ if (HTML::checkRelMeLink($doc, new Uri($owner['url']))) {
+ Profile::update(['homepage_verified' => true], $uid);
+ Logger::notice('Homepage URL verified', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
+ } else {
+ Logger::notice('Homepage URL could not be verified', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
}
}
}
diff --git a/tests/datasets/dom/relme/a-multiple-rel-value-end.html b/tests/datasets/dom/relme/a-multiple-rel-value-end.html
new file mode 100644
index 0000000000..5fa4cbdf4a
--- /dev/null
+++ b/tests/datasets/dom/relme/a-multiple-rel-value-end.html
@@ -0,0 +1,10 @@
+
+
+
+
+ Remote page
+
+
+ My Profile
+
+
diff --git a/tests/datasets/dom/relme/a-multiple-rel-value-middle.html b/tests/datasets/dom/relme/a-multiple-rel-value-middle.html
new file mode 100644
index 0000000000..08d33f051b
--- /dev/null
+++ b/tests/datasets/dom/relme/a-multiple-rel-value-middle.html
@@ -0,0 +1,10 @@
+
+
+
+
+ Remote page
+
+
+ My Profile
+
+
diff --git a/tests/datasets/dom/relme/a-multiple-rel-value-start.html b/tests/datasets/dom/relme/a-multiple-rel-value-start.html
new file mode 100644
index 0000000000..c71d8288ca
--- /dev/null
+++ b/tests/datasets/dom/relme/a-multiple-rel-value-start.html
@@ -0,0 +1,10 @@
+
+
+
+
+ Remote page
+
+
+ My Profile
+
+
diff --git a/tests/datasets/dom/relme/a-single-rel-value-fail.html b/tests/datasets/dom/relme/a-single-rel-value-fail.html
new file mode 100644
index 0000000000..2735aa4aa7
--- /dev/null
+++ b/tests/datasets/dom/relme/a-single-rel-value-fail.html
@@ -0,0 +1,10 @@
+
+
+
+
+ Remote page
+
+
+ My Profile
+
+
diff --git a/tests/datasets/dom/relme/a-single-rel-value.html b/tests/datasets/dom/relme/a-single-rel-value.html
new file mode 100644
index 0000000000..26d61204e7
--- /dev/null
+++ b/tests/datasets/dom/relme/a-single-rel-value.html
@@ -0,0 +1,10 @@
+
+
+
+
+ Remote page
+
+
+ My Profile
+
+
diff --git a/tests/datasets/dom/relme/link-single-rel-value-fail.html b/tests/datasets/dom/relme/link-single-rel-value-fail.html
new file mode 100644
index 0000000000..2b7df5cb22
--- /dev/null
+++ b/tests/datasets/dom/relme/link-single-rel-value-fail.html
@@ -0,0 +1,11 @@
+
+
+
+
+ Remote page
+
+
+
+
+
+
diff --git a/tests/datasets/dom/relme/link-single-rel-value.html b/tests/datasets/dom/relme/link-single-rel-value.html
new file mode 100644
index 0000000000..f18d000f3d
--- /dev/null
+++ b/tests/datasets/dom/relme/link-single-rel-value.html
@@ -0,0 +1,11 @@
+
+
+
+
+ Remote page
+
+
+
+
+
+
diff --git a/tests/src/Content/Text/HTMLTest.php b/tests/src/Content/Text/HTMLTest.php
index bc352e5427..e4a8603595 100644
--- a/tests/src/Content/Text/HTMLTest.php
+++ b/tests/src/Content/Text/HTMLTest.php
@@ -25,6 +25,8 @@ use Exception;
use Friendica\Content\Text\HTML;
use Friendica\Network\HTTPException\InternalServerErrorException;
use Friendica\Test\FixtureTest;
+use GuzzleHttp\Psr7\Uri;
+use Psr\Http\Message\UriInterface;
class HTMLTest extends FixtureTest
{
@@ -105,4 +107,152 @@ its surprisingly good",
self::assertEquals($expectedBBCode, $actual);
}
+
+ public function dataXpathQuote(): array
+ {
+ return [
+ 'no quotes' => [
+ 'value' => "foo",
+ ],
+ 'double quotes only' => [
+ 'value' => "\"foo",
+ ],
+ 'single quotes only' => [
+ 'value' => "'foo",
+ ],
+ 'both; double quotes in mid-string' => [
+ 'value' => "'foo\"bar",
+ ],
+ 'multiple double quotes in mid-string' => [
+ 'value' => "'foo\"bar\"baz",
+ ],
+ 'string ends with double quotes' => [
+ 'value' => "'foo\"",
+ ],
+ 'string ends with run of double quotes' => [
+ 'value' => "'foo\"\"",
+ ],
+ 'string begins with double quotes' => [
+ 'value' => "\"'foo",
+ ],
+ 'string begins with run of double quotes' => [
+ 'value' => "\"\"'foo",
+ ],
+ 'run of double quotes in mid-string' => [
+ 'value' => "'foo\"\"bar",
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider dataXpathQuote
+ * @param string $value
+ * @return void
+ * @throws \DOMException
+ */
+ public function testXpathQuote(string $value)
+ {
+ $dom = new \DOMDocument();
+ $element = $dom->createElement('test');
+ $attribute = $dom->createAttribute('value');
+ $attribute->value = $value;
+ $element->appendChild($attribute);
+ $dom->appendChild($element);
+
+ $xpath = new \DOMXPath($dom);
+
+ $result = $xpath->query('//test[@value = ' . HTML::xpathQuote($value) . ']');
+
+ $this->assertInstanceOf(\DOMNodeList::class, $result);
+ $this->assertEquals(1, $result->length);
+ }
+
+ public function dataCheckRelMeLink(): array
+ {
+ $aSingleRelValue = new \DOMDocument();
+ $aSingleRelValue->load(__DIR__ . '/../../../datasets/dom/relme/a-single-rel-value.html');
+
+ $aMultipleRelValueStart = new \DOMDocument();
+ $aMultipleRelValueStart->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-start.html');
+
+ $aMultipleRelValueMiddle = new \DOMDocument();
+ $aMultipleRelValueMiddle->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-middle.html');
+
+ $aMultipleRelValueEnd = new \DOMDocument();
+ $aMultipleRelValueEnd->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-end.html');
+
+ $linkSingleRelValue = new \DOMDocument();
+ $linkSingleRelValue->load(__DIR__ . '/../../../datasets/dom/relme/link-single-rel-value.html');
+
+ $meUrl = new Uri('https://example.com/profile/me');
+
+ return [
+ 'a-single-rel-value' => [
+ 'doc' => $aSingleRelValue,
+ 'meUrl' => $meUrl
+ ],
+ 'a-multiple-rel-value-start' => [
+ 'doc' => $aMultipleRelValueStart,
+ 'meUrl' => $meUrl
+ ],
+ 'a-multiple-rel-value-middle' => [
+ 'doc' => $aMultipleRelValueMiddle,
+ 'meUrl' => $meUrl
+ ],
+ 'a-multiple-rel-value-end' => [
+ 'doc' => $aMultipleRelValueEnd,
+ 'meUrl' => $meUrl
+ ],
+ 'link-single-rel-value' => [
+ 'doc' => $linkSingleRelValue,
+ 'meUrl' => $meUrl
+ ],
+ ];
+ }
+
+
+ /**
+ * @dataProvider dataCheckRelMeLink
+ * @param \DOMDocument $doc
+ * @param UriInterface $meUrl
+ * @return void
+ */
+ public function testCheckRelMeLink(\DOMDocument $doc, UriInterface $meUrl)
+ {
+ $this->assertTrue(HTML::checkRelMeLink($doc, $meUrl));
+ }
+
+ public function dataCheckRelMeLinkFail(): array
+ {
+ $aSingleRelValueFail = new \DOMDocument();
+ $aSingleRelValueFail->load(__DIR__ . '/../../../datasets/dom/relme/a-single-rel-value-fail.html');
+
+ $linkSingleRelValueFail = new \DOMDocument();
+ $linkSingleRelValueFail->load(__DIR__ . '/../../../datasets/dom/relme/link-single-rel-value-fail.html');
+
+ $meUrl = new Uri('https://example.com/profile/me');
+
+ return [
+ 'a-single-rel-value-fail' => [
+ 'doc' => $aSingleRelValueFail,
+ 'meUrl' => $meUrl
+ ],
+ 'link-single-rel-value-fail' => [
+ 'doc' => $linkSingleRelValueFail,
+ 'meUrl' => $meUrl
+ ],
+ ];
+ }
+
+
+ /**
+ * @dataProvider dataCheckRelMeLinkFail
+ * @param \DOMDocument $doc
+ * @param UriInterface $meUrl
+ * @return void
+ */
+ public function testCheckRelMeLinkFail(\DOMDocument $doc, UriInterface $meUrl)
+ {
+ $this->assertFalse(HTML::checkRelMeLink($doc, $meUrl));
+ }
}