mirror of
https://github.com/friendica/friendica
synced 2025-04-27 06:30:12 +00:00
The "scrape" bugfix lead to partly damaged encodings. This is fixed now.
This commit is contained in:
parent
3195bacd9e
commit
a86c143e24
5 changed files with 19 additions and 11 deletions
|
@ -20,7 +20,12 @@ class HTML5_Parser
|
|||
|
||||
// Cleanup invalid HTML
|
||||
$doc = new DOMDocument();
|
||||
@$doc->loadHTML($text);
|
||||
|
||||
if (mb_detect_encoding($text, "UTF-8", true) == "UTF-8")
|
||||
@$doc->loadHTML('<?xml encoding="UTF-8" ?>'.$text);
|
||||
else
|
||||
@$doc->loadHTML($text);
|
||||
|
||||
$text = $doc->saveHTML();
|
||||
|
||||
$tokenizer = new HTML5_Tokenizer($text, $builder);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue