Add implementation of HTTP Media Type

- Add charset extraction from DOMDocument
- TESTS!
This commit is contained in:
Hypolite Petovan 2023-01-10 01:07:14 -05:00
parent f4b5d22396
commit 5e2b655b43
4 changed files with 1671 additions and 0 deletions

View file

@ -23,6 +23,7 @@ namespace Friendica\Content\Text;
use DOMDocument;
use DOMXPath;
use Friendica\Protocol\HTTP\MediaType;
use Friendica\Content\Widget\ContactBlock;
use Friendica\Core\Hook;
use Friendica\Core\Renderer;
@ -1055,4 +1056,30 @@ class HTML
return $result !== false && $result->length > 0;
}
/**
* @param DOMDocument $doc
* @return string|null Lowercase charset
*/
public static function extractCharset(DOMDocument $doc): ?string
{
$xpath = new DOMXPath($doc);
$expression = "string(//meta[@charset]/@charset)";
if ($charset = $xpath->evaluate($expression)) {
return strtolower($charset);
}
try {
// This expression looks for a meta tag with the http-equiv attribute set to "content-type" ignoring case
// whose content attribute contains a "charset" string and returns its value
$expression = "string(//meta[@http-equiv][translate(@http-equiv, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'content-type'][contains(translate(@content, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'charset')]/@content)";
$mediaType = MediaType::fromContentType($xpath->evaluate($expression));
if (isset($mediaType->parameters['charset'])) {
return strtolower($mediaType->parameters['charset']);
}
} catch(\InvalidArgumentException $e) {}
return null;
}
}