From 91c4ab23593f141e3d441ebe70dce8883244b27e Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 23 Dec 2022 02:41:22 -0500 Subject: [PATCH] Add new xpathQuote and checkRelMeLink methods to Content\Text\HTML class - Add tests for both methods --- src/Content/Text/HTML.php | 48 ++++++ .../dom/relme/a-multiple-rel-value-end.html | 10 ++ .../relme/a-multiple-rel-value-middle.html | 10 ++ .../dom/relme/a-multiple-rel-value-start.html | 10 ++ .../dom/relme/a-single-rel-value-fail.html | 10 ++ .../dom/relme/a-single-rel-value.html | 10 ++ .../dom/relme/link-single-rel-value-fail.html | 11 ++ .../dom/relme/link-single-rel-value.html | 11 ++ tests/src/Content/Text/HTMLTest.php | 150 ++++++++++++++++++ 9 files changed, 270 insertions(+) create mode 100644 tests/datasets/dom/relme/a-multiple-rel-value-end.html create mode 100644 tests/datasets/dom/relme/a-multiple-rel-value-middle.html create mode 100644 tests/datasets/dom/relme/a-multiple-rel-value-start.html create mode 100644 tests/datasets/dom/relme/a-single-rel-value-fail.html create mode 100644 tests/datasets/dom/relme/a-single-rel-value.html create mode 100644 tests/datasets/dom/relme/link-single-rel-value-fail.html create mode 100644 tests/datasets/dom/relme/link-single-rel-value.html diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index fa5a0a5905..00d609cb60 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -33,6 +33,7 @@ use Friendica\Util\Network; use Friendica\Util\Strings; use Friendica\Util\XML; use League\HTMLToMarkdown\HtmlConverter; +use Psr\Http\Message\UriInterface; class HTML { @@ -1007,4 +1008,51 @@ class HTML return $text; } + + /** + * XPath arbitrary string quoting + * + * @see https://stackoverflow.com/a/45228168 + * @param string $value + * @return string + */ + public static function xpathQuote(string $value): string + { + if (false === strpos($value, '"')) { + return '"' . $value . '"'; + } + + if (false === strpos($value, "'")) { + return "'" . $value . "'"; + } + + // if the value contains both single and double quotes, construct an + // expression that concatenates all non-double-quote substrings with + // the quotes, e.g.: + // + // concat("'foo'", '"', "bar") + return 'concat(' . implode(', \'"\', ', array_map(['self', 'xpathQuote'], explode('"', $value))) . ')'; + } + + /** + * Checks if the provided URL is present in the DOM document in an element with the rel="me" attribute + * + * XHTML Friends Network http://gmpg.org/xfn/ + * + * @param DOMDocument $doc + * @param UriInterface $meUrl + * @return bool + */ + public static function checkRelMeLink(DOMDocument $doc, UriInterface $meUrl): bool + { + $xpath = new \DOMXpath($doc); + + // This expression checks that "me" is among the space-delimited values of the "rel" attribute. + // And that the href attribute contains exactly the provided URL + $expression = "//*[contains(concat(' ', normalize-space(@rel), ' '), ' me ')][@href = " . self::xpathQuote($meUrl) . "]"; + + $result = $xpath->query($expression); + + return $result !== false && $result->length > 0; + } } diff --git a/tests/datasets/dom/relme/a-multiple-rel-value-end.html b/tests/datasets/dom/relme/a-multiple-rel-value-end.html new file mode 100644 index 0000000000..5fa4cbdf4a --- /dev/null +++ b/tests/datasets/dom/relme/a-multiple-rel-value-end.html @@ -0,0 +1,10 @@ + + + + + Remote page + + + My Profile + + diff --git a/tests/datasets/dom/relme/a-multiple-rel-value-middle.html b/tests/datasets/dom/relme/a-multiple-rel-value-middle.html new file mode 100644 index 0000000000..08d33f051b --- /dev/null +++ b/tests/datasets/dom/relme/a-multiple-rel-value-middle.html @@ -0,0 +1,10 @@ + + + + + Remote page + + + My Profile + + diff --git a/tests/datasets/dom/relme/a-multiple-rel-value-start.html b/tests/datasets/dom/relme/a-multiple-rel-value-start.html new file mode 100644 index 0000000000..c71d8288ca --- /dev/null +++ b/tests/datasets/dom/relme/a-multiple-rel-value-start.html @@ -0,0 +1,10 @@ + + + + + Remote page + + + My Profile + + diff --git a/tests/datasets/dom/relme/a-single-rel-value-fail.html b/tests/datasets/dom/relme/a-single-rel-value-fail.html new file mode 100644 index 0000000000..2735aa4aa7 --- /dev/null +++ b/tests/datasets/dom/relme/a-single-rel-value-fail.html @@ -0,0 +1,10 @@ + + + + + Remote page + + + My Profile + + diff --git a/tests/datasets/dom/relme/a-single-rel-value.html b/tests/datasets/dom/relme/a-single-rel-value.html new file mode 100644 index 0000000000..26d61204e7 --- /dev/null +++ b/tests/datasets/dom/relme/a-single-rel-value.html @@ -0,0 +1,10 @@ + + + + + Remote page + + + My Profile + + diff --git a/tests/datasets/dom/relme/link-single-rel-value-fail.html b/tests/datasets/dom/relme/link-single-rel-value-fail.html new file mode 100644 index 0000000000..2b7df5cb22 --- /dev/null +++ b/tests/datasets/dom/relme/link-single-rel-value-fail.html @@ -0,0 +1,11 @@ + + + + + Remote page + + + + + + diff --git a/tests/datasets/dom/relme/link-single-rel-value.html b/tests/datasets/dom/relme/link-single-rel-value.html new file mode 100644 index 0000000000..f18d000f3d --- /dev/null +++ b/tests/datasets/dom/relme/link-single-rel-value.html @@ -0,0 +1,11 @@ + + + + + Remote page + + + + + + diff --git a/tests/src/Content/Text/HTMLTest.php b/tests/src/Content/Text/HTMLTest.php index bc352e5427..e4a8603595 100644 --- a/tests/src/Content/Text/HTMLTest.php +++ b/tests/src/Content/Text/HTMLTest.php @@ -25,6 +25,8 @@ use Exception; use Friendica\Content\Text\HTML; use Friendica\Network\HTTPException\InternalServerErrorException; use Friendica\Test\FixtureTest; +use GuzzleHttp\Psr7\Uri; +use Psr\Http\Message\UriInterface; class HTMLTest extends FixtureTest { @@ -105,4 +107,152 @@ its surprisingly good", self::assertEquals($expectedBBCode, $actual); } + + public function dataXpathQuote(): array + { + return [ + 'no quotes' => [ + 'value' => "foo", + ], + 'double quotes only' => [ + 'value' => "\"foo", + ], + 'single quotes only' => [ + 'value' => "'foo", + ], + 'both; double quotes in mid-string' => [ + 'value' => "'foo\"bar", + ], + 'multiple double quotes in mid-string' => [ + 'value' => "'foo\"bar\"baz", + ], + 'string ends with double quotes' => [ + 'value' => "'foo\"", + ], + 'string ends with run of double quotes' => [ + 'value' => "'foo\"\"", + ], + 'string begins with double quotes' => [ + 'value' => "\"'foo", + ], + 'string begins with run of double quotes' => [ + 'value' => "\"\"'foo", + ], + 'run of double quotes in mid-string' => [ + 'value' => "'foo\"\"bar", + ], + ]; + } + + /** + * @dataProvider dataXpathQuote + * @param string $value + * @return void + * @throws \DOMException + */ + public function testXpathQuote(string $value) + { + $dom = new \DOMDocument(); + $element = $dom->createElement('test'); + $attribute = $dom->createAttribute('value'); + $attribute->value = $value; + $element->appendChild($attribute); + $dom->appendChild($element); + + $xpath = new \DOMXPath($dom); + + $result = $xpath->query('//test[@value = ' . HTML::xpathQuote($value) . ']'); + + $this->assertInstanceOf(\DOMNodeList::class, $result); + $this->assertEquals(1, $result->length); + } + + public function dataCheckRelMeLink(): array + { + $aSingleRelValue = new \DOMDocument(); + $aSingleRelValue->load(__DIR__ . '/../../../datasets/dom/relme/a-single-rel-value.html'); + + $aMultipleRelValueStart = new \DOMDocument(); + $aMultipleRelValueStart->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-start.html'); + + $aMultipleRelValueMiddle = new \DOMDocument(); + $aMultipleRelValueMiddle->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-middle.html'); + + $aMultipleRelValueEnd = new \DOMDocument(); + $aMultipleRelValueEnd->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-end.html'); + + $linkSingleRelValue = new \DOMDocument(); + $linkSingleRelValue->load(__DIR__ . '/../../../datasets/dom/relme/link-single-rel-value.html'); + + $meUrl = new Uri('https://example.com/profile/me'); + + return [ + 'a-single-rel-value' => [ + 'doc' => $aSingleRelValue, + 'meUrl' => $meUrl + ], + 'a-multiple-rel-value-start' => [ + 'doc' => $aMultipleRelValueStart, + 'meUrl' => $meUrl + ], + 'a-multiple-rel-value-middle' => [ + 'doc' => $aMultipleRelValueMiddle, + 'meUrl' => $meUrl + ], + 'a-multiple-rel-value-end' => [ + 'doc' => $aMultipleRelValueEnd, + 'meUrl' => $meUrl + ], + 'link-single-rel-value' => [ + 'doc' => $linkSingleRelValue, + 'meUrl' => $meUrl + ], + ]; + } + + + /** + * @dataProvider dataCheckRelMeLink + * @param \DOMDocument $doc + * @param UriInterface $meUrl + * @return void + */ + public function testCheckRelMeLink(\DOMDocument $doc, UriInterface $meUrl) + { + $this->assertTrue(HTML::checkRelMeLink($doc, $meUrl)); + } + + public function dataCheckRelMeLinkFail(): array + { + $aSingleRelValueFail = new \DOMDocument(); + $aSingleRelValueFail->load(__DIR__ . '/../../../datasets/dom/relme/a-single-rel-value-fail.html'); + + $linkSingleRelValueFail = new \DOMDocument(); + $linkSingleRelValueFail->load(__DIR__ . '/../../../datasets/dom/relme/link-single-rel-value-fail.html'); + + $meUrl = new Uri('https://example.com/profile/me'); + + return [ + 'a-single-rel-value-fail' => [ + 'doc' => $aSingleRelValueFail, + 'meUrl' => $meUrl + ], + 'link-single-rel-value-fail' => [ + 'doc' => $linkSingleRelValueFail, + 'meUrl' => $meUrl + ], + ]; + } + + + /** + * @dataProvider dataCheckRelMeLinkFail + * @param \DOMDocument $doc + * @param UriInterface $meUrl + * @return void + */ + public function testCheckRelMeLinkFail(\DOMDocument $doc, UriInterface $meUrl) + { + $this->assertFalse(HTML::checkRelMeLink($doc, $meUrl)); + } }