mirror of
https://github.com/vector-im/element-ios.git
synced 2024-09-28 23:32:41 +00:00
Fix HTML render for links containing Markdown formatting (#5587)
* Fix HTML render for links containing Markdown formatting * Log renderToHTML fail as an error * Fix AST nodes memory handling Co-authored-by: Arnaud Ringenbach <arnaud.ringenbach@niji.fr>
This commit is contained in:
parent
1b3ff901f6
commit
057cb9ed70
3 changed files with 295 additions and 1 deletions
|
@ -36,7 +36,17 @@ public class MarkdownToHTMLRenderer: NSObject {
|
|||
extension MarkdownToHTMLRenderer: MarkdownToHTMLRendererProtocol {
|
||||
|
||||
public func renderToHTML(markdown: String) -> String? {
|
||||
return try? Down(markdownString: markdown).toHTML(options)
|
||||
do {
|
||||
let ast = try DownASTRenderer.stringToAST(markdown, options: options)
|
||||
defer {
|
||||
cmark_node_free(ast)
|
||||
}
|
||||
ast.repairLinks()
|
||||
return try DownHTMLRenderer.astToHTML(ast, options: options)
|
||||
} catch {
|
||||
MXLog.error("[MarkdownToHTMLRenderer] renderToHTML failed with string: \(markdown)")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -50,3 +60,116 @@ public class MarkdownToHTMLRendererHardBreaks: MarkdownToHTMLRenderer {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
// MARK: - AST-handling private extensions
|
||||
private extension CMarkNode {
|
||||
/// Formatting symbol associated with given note type
|
||||
/// Note: this is only defined for node types that are handled in repairLinks
|
||||
var formattingSymbol: String {
|
||||
switch self.type {
|
||||
case CMARK_NODE_EMPH:
|
||||
return "_"
|
||||
case CMARK_NODE_STRONG:
|
||||
return "__"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
/// Repairs links that were broken down by markdown formatting.
|
||||
/// Should be used on the first node of libcmark's AST
|
||||
/// (e.g. the object returned by DownASTRenderer.stringToAST).
|
||||
func repairLinks() {
|
||||
let iterator = cmark_iter_new(self)
|
||||
var text = ""
|
||||
var isInParagraph = false
|
||||
var previousNode: CMarkNode?
|
||||
var orphanNodes: [CMarkNode] = []
|
||||
var shouldUnlinkFormattingMode = false
|
||||
var event: cmark_event_type?
|
||||
while event != CMARK_EVENT_DONE {
|
||||
event = cmark_iter_next(iterator)
|
||||
|
||||
guard let node = cmark_iter_get_node(iterator) else { return }
|
||||
|
||||
if node.type == CMARK_NODE_PARAGRAPH {
|
||||
if event == CMARK_EVENT_ENTER {
|
||||
isInParagraph = true
|
||||
} else {
|
||||
isInParagraph = false
|
||||
text = ""
|
||||
}
|
||||
}
|
||||
|
||||
if isInParagraph {
|
||||
switch node.type {
|
||||
case CMARK_NODE_SOFTBREAK,
|
||||
CMARK_NODE_LINEBREAK:
|
||||
text = ""
|
||||
case CMARK_NODE_TEXT:
|
||||
if let literal = node.literal {
|
||||
text += literal
|
||||
// Reset text if it ends up with a whitespace.
|
||||
if text.last?.isWhitespace == true {
|
||||
text = ""
|
||||
}
|
||||
// Only the last part could be a link conflicting with next node.
|
||||
text = String(text.split(separator: " ").last ?? "")
|
||||
}
|
||||
case CMARK_NODE_EMPH where previousNode?.type == CMARK_NODE_TEXT,
|
||||
CMARK_NODE_STRONG where previousNode?.type == CMARK_NODE_TEXT:
|
||||
if event == CMARK_EVENT_ENTER {
|
||||
if !text.containedUrls.isEmpty,
|
||||
let childLiteral = node.pointee.first_child.literal {
|
||||
// If current text is a link, the formatted text is reverted back to a
|
||||
// plain text as a part of the link.
|
||||
let symbol = node.formattingSymbol
|
||||
let nonFormattedText = "\(symbol)\(childLiteral)\(symbol)"
|
||||
let replacementTextNode = cmark_node_new(CMARK_NODE_TEXT)
|
||||
cmark_node_set_literal(replacementTextNode, nonFormattedText)
|
||||
cmark_node_insert_after(previousNode, replacementTextNode)
|
||||
// Set child literal to empty string so we dont read it.
|
||||
// This avoids having to re-create the main
|
||||
// iterator in the middle of the process.
|
||||
cmark_node_set_literal(node.pointee.first_child, "")
|
||||
let newIterator = cmark_iter_new(node)
|
||||
_ = cmark_iter_next(newIterator)
|
||||
cmark_node_unlink(node)
|
||||
orphanNodes.append(node)
|
||||
let nextNode = cmark_iter_get_node(newIterator)
|
||||
cmark_node_insert_after(previousNode, nextNode)
|
||||
shouldUnlinkFormattingMode = true
|
||||
}
|
||||
} else {
|
||||
if shouldUnlinkFormattingMode {
|
||||
cmark_node_unlink(node)
|
||||
orphanNodes.append(node)
|
||||
shouldUnlinkFormattingMode = false
|
||||
}
|
||||
}
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
previousNode = node
|
||||
}
|
||||
|
||||
// Free all nodes removed from the AST.
|
||||
// This is done as a last step to avoid messing
|
||||
// up with the main itertor.
|
||||
for orphanNode in orphanNodes {
|
||||
cmark_node_free(orphanNode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private extension String {
|
||||
/// Returns array of URLs detected inside the String.
|
||||
var containedUrls: [NSTextCheckingResult] {
|
||||
guard let detector = try? NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue) else {
|
||||
return []
|
||||
}
|
||||
|
||||
return detector.matches(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count))
|
||||
}
|
||||
}
|
||||
|
|
170
RiotTests/MarkdownToHTMLRendererTests.swift
Normal file
170
RiotTests/MarkdownToHTMLRendererTests.swift
Normal file
|
@ -0,0 +1,170 @@
|
|||
//
|
||||
// Copyright 2022 New Vector Ltd
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
import XCTest
|
||||
|
||||
@testable import Riot
|
||||
|
||||
final class MarkdownToHTMLRendererTests: XCTestCase {
|
||||
// MARK: - Tests
|
||||
/// Test autolinks HTML render.
|
||||
func testRenderAutolinks() {
|
||||
let input = [
|
||||
"Test1:",
|
||||
"<#_foonetic_xkcd:matrix.org>",
|
||||
"<http://google.com/_thing_>",
|
||||
"<https://matrix.org/_matrix/client/foo/123_>",
|
||||
"<#_foonetic_xkcd:matrix.org>",
|
||||
"",
|
||||
"Test1A:",
|
||||
"<#_foonetic_xkcd:matrix.org>",
|
||||
"<http://google.com/_thing_>",
|
||||
"<https://matrix.org/_matrix/client/foo/123_>",
|
||||
"<#_foonetic_xkcd:matrix.org>",
|
||||
"",
|
||||
"Test2:",
|
||||
"<http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg>",
|
||||
"<http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg>",
|
||||
"",
|
||||
"Test3:",
|
||||
"<https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org>",
|
||||
"<https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org>",
|
||||
].joined(separator: "\n")
|
||||
let expectedOutput = [
|
||||
"<p>Test1:\n<#_foonetic_xkcd:matrix.org>\n<a href=\"http://google.com/_thing_\">http://google.com/_thing_</a>\n<a href=\"https://matrix.org/_matrix/client/foo/123_\">https://matrix.org/_matrix/client/foo/123_</a>\n<#_foonetic_xkcd:matrix.org></p>",
|
||||
"<p>Test1A:\n<#_foonetic_xkcd:matrix.org>\n<a href=\"http://google.com/_thing_\">http://google.com/_thing_</a>\n<a href=\"https://matrix.org/_matrix/client/foo/123_\">https://matrix.org/_matrix/client/foo/123_</a>\n<#_foonetic_xkcd:matrix.org></p>",
|
||||
"<p>Test2:\n<a href=\"http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg\">http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg</a>\n<a href=\"http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg\">http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg</a></p>",
|
||||
"<p>Test3:\n<a href=\"https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org\">https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org</a>\n<a href=\"https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org\">https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org</a></p>",
|
||||
"",
|
||||
].joined(separator: "\n")
|
||||
testRenderHTML(input: input, expectedOutput: expectedOutput)
|
||||
}
|
||||
|
||||
/// Test links with markdown formatting conflict.
|
||||
func testRenderRepairedLinks() {
|
||||
let input = [
|
||||
"Test1:",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"http://google.com/_thing_",
|
||||
"https://matrix.org/_matrix/client/foo/123_",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"",
|
||||
"Test1A:",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"http://google.com/_thing_",
|
||||
"https://matrix.org/_matrix/client/foo/123_",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"",
|
||||
"Test2:",
|
||||
"http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg",
|
||||
"http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg",
|
||||
"",
|
||||
"Test3:",
|
||||
"https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org",
|
||||
"https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org",
|
||||
].joined(separator: "\n")
|
||||
let expectedOutput = [
|
||||
"<p>Test1:\n#_foonetic_xkcd:matrix.org\nhttp://google.com/_thing_\nhttps://matrix.org/_matrix/client/foo/123_\n#_foonetic_xkcd:matrix.org</p>",
|
||||
"<p>Test1A:\n#_foonetic_xkcd:matrix.org\nhttp://google.com/_thing_\nhttps://matrix.org/_matrix/client/foo/123_\n#_foonetic_xkcd:matrix.org</p>",
|
||||
"<p>Test2:\nhttp://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg\nhttp://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg</p>",
|
||||
"<p>Test3:\nhttps://riot.im/app/#/room/#_foonetic_xkcd:matrix.org\nhttps://riot.im/app/#/room/#_foonetic_xkcd:matrix.org</p>",
|
||||
"",
|
||||
].joined(separator: "\n")
|
||||
testRenderHTML(input: input, expectedOutput: expectedOutput)
|
||||
}
|
||||
|
||||
/// Test links with markdown strong formatting conflict.
|
||||
func testRenderRepairedLinksWithStrongFormatting() {
|
||||
let input = "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py"
|
||||
+ " "
|
||||
+ "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py"
|
||||
let expectedOutput = "<p>https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py"
|
||||
+ " "
|
||||
+ "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py</p>"
|
||||
+ "\n"
|
||||
testRenderHTML(input: input, expectedOutput: expectedOutput)
|
||||
}
|
||||
|
||||
/// Test links with markdown formatting conflict and actual markdown in between.
|
||||
func testRenderRepairedLinksWithMarkdownInBetween() {
|
||||
let input = "__Some bold text__ "
|
||||
+ "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py"
|
||||
+ " _some emphased text_ "
|
||||
+ "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg"
|
||||
let expectedOutput = "<p><strong>Some bold text</strong> "
|
||||
+ "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py"
|
||||
+ " <em>some emphased text</em> "
|
||||
+ "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg</p>"
|
||||
+ "\n"
|
||||
testRenderHTML(input: input, expectedOutput: expectedOutput)
|
||||
}
|
||||
|
||||
/// Test links inside codeblocks.
|
||||
func testRenderLinksInCodeblock() {
|
||||
let input = "```"
|
||||
+ [
|
||||
"Test1:",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"http://google.com/_thing_",
|
||||
"https://matrix.org/_matrix/client/foo/123_",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"",
|
||||
"Test1A:",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"http://google.com/_thing_",
|
||||
"https://matrix.org/_matrix/client/foo/123_",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"",
|
||||
"Test2:",
|
||||
"http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg",
|
||||
"http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg",
|
||||
"",
|
||||
"Test3:",
|
||||
"https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org",
|
||||
"https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org",
|
||||
].joined(separator: "\n")
|
||||
+ "```"
|
||||
let expectedOutput = [
|
||||
"<pre><code class=\"language-Test1:\">#_foonetic_xkcd:matrix.org",
|
||||
"http://google.com/_thing_",
|
||||
"https://matrix.org/_matrix/client/foo/123_",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"",
|
||||
"Test1A:",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"http://google.com/_thing_",
|
||||
"https://matrix.org/_matrix/client/foo/123_",
|
||||
"#_foonetic_xkcd:matrix.org",
|
||||
"",
|
||||
"Test2:",
|
||||
"http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg",
|
||||
"http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg",
|
||||
"",
|
||||
"Test3:",
|
||||
"https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org",
|
||||
"https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org```",
|
||||
"</code></pre>",
|
||||
"",
|
||||
].joined(separator: "\n")
|
||||
testRenderHTML(input: input, expectedOutput: expectedOutput)
|
||||
}
|
||||
|
||||
// MARK: - Private
|
||||
private func testRenderHTML(input: String, expectedOutput: String) {
|
||||
let output = MarkdownToHTMLRenderer().renderToHTML(markdown: input)
|
||||
XCTAssertEqual(output, expectedOutput)
|
||||
}
|
||||
}
|
1
changelog.d/5355.bugfix
Normal file
1
changelog.d/5355.bugfix
Normal file
|
@ -0,0 +1 @@
|
|||
Markdown/HTML: Fix HTTP links containing Markdown formatting
|
Loading…
Reference in a new issue