mirror of
https://github.com/friendica/friendica
synced 2025-04-19 11:10:10 +00:00
Add implementation of HTTP Media Type
- Add charset extraction from DOMDocument - TESTS!
This commit is contained in:
parent
f4b5d22396
commit
5e2b655b43
4 changed files with 1671 additions and 0 deletions
|
@ -23,6 +23,7 @@ namespace Friendica\Content\Text;
|
|||
|
||||
use DOMDocument;
|
||||
use DOMXPath;
|
||||
use Friendica\Protocol\HTTP\MediaType;
|
||||
use Friendica\Content\Widget\ContactBlock;
|
||||
use Friendica\Core\Hook;
|
||||
use Friendica\Core\Renderer;
|
||||
|
@ -1055,4 +1056,30 @@ class HTML
|
|||
|
||||
return $result !== false && $result->length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param DOMDocument $doc
|
||||
* @return string|null Lowercase charset
|
||||
*/
|
||||
public static function extractCharset(DOMDocument $doc): ?string
|
||||
{
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
$expression = "string(//meta[@charset]/@charset)";
|
||||
if ($charset = $xpath->evaluate($expression)) {
|
||||
return strtolower($charset);
|
||||
}
|
||||
|
||||
try {
|
||||
// This expression looks for a meta tag with the http-equiv attribute set to "content-type" ignoring case
|
||||
// whose content attribute contains a "charset" string and returns its value
|
||||
$expression = "string(//meta[@http-equiv][translate(@http-equiv, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'content-type'][contains(translate(@content, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'charset')]/@content)";
|
||||
$mediaType = MediaType::fromContentType($xpath->evaluate($expression));
|
||||
if (isset($mediaType->parameters['charset'])) {
|
||||
return strtolower($mediaType->parameters['charset']);
|
||||
}
|
||||
} catch(\InvalidArgumentException $e) {}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue