RichTextParser.swift 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. //
  2. // RichTextParser.swift
  3. // RichTextView
  4. //
  5. // Created by Ahmed Elkady on 2018-11-08.
  6. // Copyright © 2018 Top Hat. All rights reserved.
  7. //
  8. import Down
  9. import UIKit
  10. class RichTextParser {
  11. // MARK: - Constants
  12. enum ParserConstants {
  13. static let mathTagName = "math"
  14. static let interactiveElementTagName = "interactive-element"
  15. static let highlightedElementTagName = "highlighted-element"
  16. static let latexRegex = "\\[\(ParserConstants.mathTagName)\\](.*?)\\[\\/\(ParserConstants.mathTagName)\\]"
  17. static let latexRegexCaptureGroupIndex = 0
  18. static let interactiveElementRegex = """
  19. \\[\(ParserConstants.interactiveElementTagName)\\sid=.+?\\].*?\\[\\/\(ParserConstants.interactiveElementTagName)\\]
  20. """
  21. static let highlightedElementRegex = """
  22. \\[\(ParserConstants.highlightedElementTagName)\\sid=.+?\\].*?\\[\\/\(ParserConstants.highlightedElementTagName)\\]
  23. """
  24. private static let tAPlaceholderPrefix = "{RichTextView-TextAttachmentPosition"
  25. private static let tAPlaceholderSuffix = "}"
  26. static let textAttachmentPlaceholderAssigner = "="
  27. static let textAttachmentPlaceholderRegex =
  28. "\\\(ParserConstants.tAPlaceholderPrefix)\(ParserConstants.textAttachmentPlaceholderAssigner)[0-9]+?\\\(ParserConstants.tAPlaceholderSuffix)"
  29. static let textAttachmentPlaceholder =
  30. "\(ParserConstants.tAPlaceholderPrefix)\(ParserConstants.textAttachmentPlaceholderAssigner)%d\(ParserConstants.tAPlaceholderSuffix)"
  31. typealias RichTextWithErrors = (output: NSAttributedString, errors: [ParsingError]?)
  32. static let bulletString = "•"
  33. static let listOpeningHTMLString = "</style></head><body><ul"
  34. static let listClosingHTMLString = "</ul></body></html>"
  35. static let latexSubscriptCharacter = "_"
  36. static let defaultSubScriptOffset: CGFloat = 2.66
  37. static let bulletCustomAttributeIdentifier = "bullets"
  38. }
  39. // MARK: - Dependencies
  40. let latexParser: LatexParserProtocol
  41. let font: UIFont
  42. let textColor: UIColor
  43. let latexTextBaselineOffset: CGFloat
  44. let interactiveTextColor: UIColor
  45. let customAdditionalAttributes: [String: [NSAttributedString.Key: Any]]?
  46. // MARK: - Init
  47. init(latexParser: LatexParserProtocol = LatexParser(),
  48. font: UIFont = UIFont.systemFont(ofSize: UIFont.systemFontSize),
  49. textColor: UIColor = UIColor.black,
  50. latexTextBaselineOffset: CGFloat = 0,
  51. interactiveTextColor: UIColor = UIColor.blue,
  52. customAdditionalAttributes: [String: [NSAttributedString.Key: Any]]? = nil) {
  53. self.latexParser = latexParser
  54. self.font = font
  55. self.textColor = textColor
  56. self.latexTextBaselineOffset = latexTextBaselineOffset
  57. self.interactiveTextColor = interactiveTextColor
  58. self.customAdditionalAttributes = customAdditionalAttributes
  59. }
  60. // MARK: - Multi-Purpose Functions
  61. func getRichDataTypes(from input: String) -> [RichDataType] {
  62. if input.isEmpty {
  63. return [RichDataType.text(richText: NSAttributedString(string: ""), font: self.font, errors: nil)]
  64. }
  65. var errors: [ParsingError]?
  66. return self.splitInputOnVideoPortions(input).compactMap { input -> RichDataType in
  67. if self.isStringAVideoTag(input) {
  68. return RichDataType.video(tag: input, error: nil)
  69. }
  70. let results = self.getRichTextWithErrors(from: input)
  71. if errors == nil {
  72. errors = results.errors
  73. } else if let resultErrors = results.errors {
  74. errors?.append(contentsOf: resultErrors)
  75. }
  76. return RichDataType.text(richText: results.output, font: self.font, errors: errors)
  77. }
  78. }
  79. func getRichTextWithErrors(from input: String) -> ParserConstants.RichTextWithErrors {
  80. let input = self.stripCodeTagsIfNecessary(from: input)
  81. let inputAsMutableAttributedString = NSMutableAttributedString(string: input)
  82. let richTextWithSpecialDataTypesHandled = self.getRichTextWithSpecialDataTypesHandled(
  83. fromString: inputAsMutableAttributedString
  84. )
  85. let textAttachmentAttributesInRichText = self.extractTextAttachmentAttributesInOrder(fromAttributedString: richTextWithSpecialDataTypesHandled.output)
  86. let richTextWithHTMLAndMarkdownHandled = self.getRichTextWithHTMLAndMarkdownHandled(
  87. fromString: self.replaceTextAttachmentsWithPlaceHolderInfo(inAttributedString: richTextWithSpecialDataTypesHandled.output)
  88. )
  89. let outputRichText = self.mergeSpecialDataAndHTMLMarkdownAttribute(
  90. htmlMarkdownString: NSMutableAttributedString(attributedString: richTextWithHTMLAndMarkdownHandled.output),
  91. specialDataTypesString: richTextWithSpecialDataTypesHandled.output,
  92. textAttachmentAttributes: textAttachmentAttributesInRichText
  93. ).trimmingTrailingNewlinesAndWhitespaces()
  94. outputRichText.replaceFont(with: self.font)
  95. outputRichText.replaceColor(with: self.textColor)
  96. if richTextWithSpecialDataTypesHandled.errors == nil, richTextWithHTMLAndMarkdownHandled.errors == nil {
  97. return (outputRichText, nil)
  98. }
  99. let outputErrors = (richTextWithSpecialDataTypesHandled.errors ?? [ParsingError]()) + (richTextWithHTMLAndMarkdownHandled.errors ?? [ParsingError]())
  100. return (outputRichText, outputErrors)
  101. }
  102. private func mergeSpecialDataAndHTMLMarkdownAttribute(htmlMarkdownString: NSMutableAttributedString,
  103. specialDataTypesString: NSAttributedString,
  104. textAttachmentAttributes: [[NSAttributedString.Key: Any]]) -> NSMutableAttributedString {
  105. let outputString = self.mergeTextAttachmentsAndHTMLMarkdownAttributes(
  106. htmlMarkdownString: htmlMarkdownString,
  107. textAttachmentAttributes: textAttachmentAttributes
  108. )
  109. let rangeOfSpecialDataString = NSRange(location: 0, length: specialDataTypesString.length)
  110. specialDataTypesString.enumerateAttributes(in: rangeOfSpecialDataString) { (attributes, range, _) in
  111. if attributes.isEmpty || attributes[.attachment] != nil {
  112. return
  113. }
  114. let specialDataSubstring = specialDataTypesString.string[
  115. max(range.lowerBound, 0)..<min(range.upperBound, specialDataTypesString.string.count)
  116. ]
  117. let rangeOfSubstringInOutputString = (outputString.string as NSString).range(of: specialDataSubstring)
  118. if rangeOfSubstringInOutputString.location == NSNotFound ||
  119. rangeOfSubstringInOutputString.location < 0 ||
  120. rangeOfSubstringInOutputString.location + rangeOfSubstringInOutputString.length > outputString.length {
  121. return
  122. }
  123. let newOutuptSubstring = NSMutableAttributedString(attributedString: outputString.attributedSubstring(from: rangeOfSubstringInOutputString))
  124. newOutuptSubstring.addAttributes(attributes, range: NSRange(location: 0, length: newOutuptSubstring.length))
  125. newOutuptSubstring.replaceCharacters(in: NSRange(location: 0, length: newOutuptSubstring.length), with: specialDataSubstring)
  126. outputString.replaceCharacters(in: rangeOfSubstringInOutputString, with: newOutuptSubstring)
  127. }
  128. return outputString
  129. }
  130. private func mergeTextAttachmentsAndHTMLMarkdownAttributes(htmlMarkdownString: NSMutableAttributedString,
  131. textAttachmentAttributes: [[NSAttributedString.Key: Any]]) -> NSMutableAttributedString {
  132. let textAttachmentRegex = try? NSRegularExpression(pattern: ParserConstants.textAttachmentPlaceholderRegex, options: [])
  133. let inputRange = NSRange(location: 0, length: htmlMarkdownString.length)
  134. guard let textAttachmentMatches = textAttachmentRegex?.matches(in: htmlMarkdownString.string, options: [], range: inputRange) else {
  135. return htmlMarkdownString
  136. }
  137. for match in textAttachmentMatches.reversed() {
  138. let matchedSubstring = htmlMarkdownString.attributedSubstring(from: match.range).string
  139. let matchedComponentsSeparatedByAssigner = matchedSubstring.components(
  140. separatedBy: ParserConstants.textAttachmentPlaceholderAssigner
  141. )
  142. let decimalCharacters = CharacterSet.decimalDigits.inverted
  143. guard let textAttachmentPositionAsSubstring = matchedComponentsSeparatedByAssigner.last?.components(separatedBy: decimalCharacters).joined(),
  144. let textAttachmentPosition = Int(textAttachmentPositionAsSubstring),
  145. textAttachmentAttributes.indices.contains(textAttachmentPosition) else {
  146. continue
  147. }
  148. let textAttachmentAttributes = textAttachmentAttributes[textAttachmentPosition]
  149. guard let textAttachment = textAttachmentAttributes[.attachment] as? NSTextAttachment else {
  150. continue
  151. }
  152. let textAttachmentAttributedString = NSMutableAttributedString(attachment: textAttachment)
  153. textAttachmentAttributedString.addAttributes(
  154. textAttachmentAttributes,
  155. range: NSRange(location: 0, length: textAttachmentAttributedString.length)
  156. )
  157. htmlMarkdownString.replaceCharacters(in: match.range, with: textAttachmentAttributedString)
  158. }
  159. return htmlMarkdownString
  160. }
  161. // MARK: - Boolean Checkers
  162. func isTextLatex(_ text: String) -> Bool {
  163. return !self.getLatexRanges(inText: text).isEmpty
  164. }
  165. func isTextInteractiveElement(_ text: String) -> Bool {
  166. return text.ranges(of: ParserConstants.interactiveElementRegex, options: .regularExpression).count != 0
  167. }
  168. func isTextHighlightedElement(_ text: String) -> Bool {
  169. return text.ranges(of: ParserConstants.highlightedElementRegex, options: .regularExpression).count != 0
  170. }
  171. private func isStringAVideoTag(_ input: String) -> Bool {
  172. return input.range(of: RichTextViewConstants.videoTagRegex, options: .regularExpression, range: nil, locale: nil) != nil
  173. }
  174. // MARK: - Video Functions
  175. private func splitInputOnVideoPortions(_ input: String) -> [String] {
  176. return input.getComponents(separatedBy: RichTextViewConstants.videoTagRegex)
  177. }
  178. // MARK: - HTML/Markdown Helpers
  179. private func getRichTextWithHTMLAndMarkdownHandled(fromString mutableAttributedString: NSMutableAttributedString) -> ParserConstants.RichTextWithErrors {
  180. let inputString = mutableAttributedString.string
  181. let inputStringWithoutBreakingSpaces = inputString.replaceTrailingWhiteSpaceWithNonBreakingSpace().replaceLeadingWhiteSpaceWithNonBreakingSpace()
  182. let inputStringWithoutCommonEditorTags = self.removeCommonEditorTags(from: inputStringWithoutBreakingSpaces)
  183. guard let inputAsHTMLString = try? Down(markdownString: inputStringWithoutCommonEditorTags).toHTML([.unsafe, .hardBreaks]),
  184. let inputAsHTMLWithZeroWidthSpaceRemoved = inputAsHTMLString.replaceAppropiateZeroWidthSpaces(),
  185. let htmlData = unescapeHTML(from: inputAsHTMLWithZeroWidthSpaceRemoved).data(using: .utf8) else {
  186. return (mutableAttributedString.trimmingTrailingNewlinesAndWhitespaces(), [ParsingError.attributedTextGeneration(text: inputString)])
  187. }
  188. let parsedAttributedString = self.getParsedHTMLAttributedString(fromData: htmlData)
  189. guard let parsedHTMLAttributedString = parsedAttributedString else {
  190. return (mutableAttributedString.trimmingTrailingNewlinesAndWhitespaces(), [ParsingError.attributedTextGeneration(text: inputString)])
  191. }
  192. let parsedMutableAttributedString = NSMutableAttributedString(attributedString: parsedHTMLAttributedString)
  193. let finalOutputString = self.addCustomStylingToBulletPointsIfNecessary(parsedMutableAttributedString)
  194. return (finalOutputString, nil)
  195. }
  196. private func getParsedHTMLAttributedString(fromData data: Data) -> NSAttributedString? {
  197. var attributedString: NSAttributedString?
  198. let options: [NSAttributedString.DocumentReadingOptionKey: Any] = [
  199. .documentType: NSAttributedString.DocumentType.html,
  200. .characterEncoding: String.Encoding.utf8.rawValue
  201. ]
  202. if Thread.isMainThread {
  203. attributedString = try? NSAttributedString(data: data, options: options, documentAttributes: nil)
  204. } else {
  205. DispatchQueue.main.sync {
  206. attributedString = try? NSAttributedString(data: data, options: options, documentAttributes: nil)
  207. }
  208. }
  209. return attributedString
  210. }
  211. private func addCustomStylingToBulletPointsIfNecessary(_ input: NSMutableAttributedString) -> NSMutableAttributedString {
  212. guard let customBulletAttributes = self.customAdditionalAttributes?[ParserConstants.bulletCustomAttributeIdentifier],
  213. let bulletPointRegex = try? NSRegularExpression(pattern: ParserConstants.bulletString, options: []) else {
  214. return input
  215. }
  216. let bulletPointMatches = bulletPointRegex.matches(
  217. in: input.string,
  218. options: [],
  219. range: NSRange(location: 0, length: input.string.count)
  220. )
  221. let output = input
  222. bulletPointMatches.reversed().forEach { match in
  223. output.addAttributes(customBulletAttributes, range: match.range)
  224. }
  225. return output
  226. }
  227. private func stripCodeTagsIfNecessary(from input: String) -> String {
  228. return input.replacingOccurrences(of: "[code]", with: "`").replacingOccurrences(of: "[/code]", with: "`")
  229. }
  230. private func removeCommonEditorTags(from input: String) -> String {
  231. return input.replacingOccurrences(of: "<p id=\"\">", with: "").replacingOccurrences(of: "</p>", with: "")
  232. }
  233. private func unescapeHTML(from input: String) -> String {
  234. return input.replacingOccurrences(of: "&amp;", with: "&")
  235. .replacingOccurrences(of: "&lt;", with: "<")
  236. .replacingOccurrences(of: "&gt;", with: ">")
  237. .replacingOccurrences(of: "&quot;", with: "\"")
  238. .replacingOccurrences(of: "&#39;", with: "'")
  239. .replacingOccurrences(of: "&nbsp;", with: " ")
  240. }
  241. // MARK: - String Helpers
  242. private func split(mutableAttributedString: NSMutableAttributedString, onPositions positions: [String.Index]) -> [NSAttributedString] {
  243. let splitStrings = mutableAttributedString.string.split(atPositions: positions)
  244. var output = [NSAttributedString]()
  245. for string in splitStrings {
  246. let range = (mutableAttributedString.string as NSString).range(of: string)
  247. let attributedString = mutableAttributedString.attributedSubstring(from: range)
  248. output.append(attributedString)
  249. }
  250. return output
  251. }
  252. private func extractPositions(fromRanges ranges: [Range<String.Index>]) -> [String.Index] {
  253. return ranges.flatMap { [$0.lowerBound, $0.upperBound] }.sorted()
  254. }
  255. // MARK: - Text Attachment Functions
  256. private func extractTextAttachmentAttributesInOrder(fromAttributedString input: NSAttributedString) -> [[NSAttributedString.Key: Any]] {
  257. var output = [[NSAttributedString.Key: Any]]()
  258. let range = NSRange(location: 0, length: input.length)
  259. input.enumerateAttributes(in: range, options: [.reverse]) { (attributes, _, _) in
  260. guard attributes.keys.contains(.attachment) else {
  261. return
  262. }
  263. output.append(attributes)
  264. }
  265. return output
  266. }
  267. private func replaceTextAttachmentsWithPlaceHolderInfo(inAttributedString input: NSAttributedString) -> NSMutableAttributedString {
  268. let output = NSMutableAttributedString(attributedString: input)
  269. let range = NSRange(location: 0, length: input.length)
  270. var position = 0
  271. input.enumerateAttributes(in: range, options: [.reverse]) { (attributes, range, _) in
  272. guard attributes.keys.contains(.attachment) else {
  273. return
  274. }
  275. output.replaceCharacters(in: range, with: String(format: ParserConstants.textAttachmentPlaceholder, position))
  276. position += 1
  277. }
  278. return output
  279. }
  280. // MARK: - Special Data Type Helpers
  281. private func getLatexRanges(inText text: String) -> [Range<String.Index>] {
  282. guard let regex = try? NSRegularExpression(pattern: ParserConstants.latexRegex, options: [.caseInsensitive, .dotMatchesLineSeparators]) else {
  283. return []
  284. }
  285. let range = NSRange(location: 0, length: text.count)
  286. let matches = regex.matches(in: text, range: range)
  287. return matches.compactMap { match in
  288. return Range<String.Index>(match.range(at: ParserConstants.latexRegexCaptureGroupIndex), in: text)
  289. }
  290. }
  291. private func calculateContentHeight() -> CGFloat {
  292. let frame = NSString(string: "").boundingRect(
  293. with: CGSize(width: 0, height: .max),
  294. options: [.usesFontLeading, .usesLineFragmentOrigin],
  295. attributes: [.font: self.font],
  296. context: nil
  297. )
  298. return frame.size.height
  299. }
  300. private func getRichTextWithSpecialDataTypesHandled(fromString mutableAttributedString: NSMutableAttributedString) -> ParserConstants.RichTextWithErrors {
  301. let interactiveElementPositions = self.extractPositions(
  302. fromRanges: mutableAttributedString.string.ranges(of: ParserConstants.interactiveElementRegex, options: .regularExpression)
  303. )
  304. let highlightedElementPositions = self.extractPositions(
  305. fromRanges: mutableAttributedString.string.ranges(of: ParserConstants.highlightedElementRegex, options: .regularExpression)
  306. )
  307. let latexPositions = self.extractPositions(fromRanges: self.getLatexRanges(inText: mutableAttributedString.string))
  308. let splitPositions = interactiveElementPositions + latexPositions + highlightedElementPositions
  309. if splitPositions.isEmpty {
  310. return (mutableAttributedString.trimmingTrailingNewlinesAndWhitespaces(), nil)
  311. }
  312. return self.mergeSpecialDataComponentsAndReturnRichText(
  313. self.split(mutableAttributedString: mutableAttributedString, onPositions: splitPositions)
  314. )
  315. }
  316. private func mergeSpecialDataComponentsAndReturnRichText(_ components: [NSAttributedString]) -> ParserConstants.RichTextWithErrors {
  317. let output = NSMutableAttributedString()
  318. var parsingErrors: [ParsingError]?
  319. components.forEach { attributedString in
  320. if self.isTextInteractiveElement(attributedString.string) {
  321. output.append(self.extractInteractiveElement(from: attributedString))
  322. return
  323. }
  324. if self.isTextHighlightedElement(attributedString.string) {
  325. output.append(self.extractHighlightedElement(from: attributedString))
  326. return
  327. }
  328. if self.isTextLatex(attributedString.string) {
  329. if let attributedLatexString = self.extractLatex(from: attributedString.string) {
  330. output.append(attributedLatexString)
  331. return
  332. }
  333. if parsingErrors == nil {
  334. parsingErrors = [ParsingError]()
  335. }
  336. output.append(attributedString)
  337. parsingErrors?.append(ParsingError.latexGeneration(text: attributedString.string))
  338. return
  339. }
  340. output.append(attributedString)
  341. }
  342. return (output.trimmingTrailingNewlinesAndWhitespaces(), parsingErrors)
  343. }
  344. func extractLatex(from input: String) -> NSAttributedString? {
  345. return self.latexParser.extractLatex(
  346. from: input,
  347. textColor: self.textColor,
  348. baselineOffset: self.latexTextBaselineOffset,
  349. fontSize: self.font.pointSize,
  350. height: self.calculateContentHeight()
  351. )
  352. }
  353. func extractInteractiveElement(from input: NSAttributedString) -> NSMutableAttributedString {
  354. let interactiveElementTagName = ParserConstants.interactiveElementTagName
  355. let interactiveElementID = input.string.getSubstring(inBetween: "[\(interactiveElementTagName) id=", and: "]") ?? input.string
  356. let interactiveElementText = input.string.getSubstring(inBetween: "]", and: "[/\(interactiveElementTagName)]") ?? input.string
  357. let attributes: [NSAttributedString.Key: Any] = [.link: interactiveElementID].merging(input.attributes(at: 0, effectiveRange: nil)) { (current, _) in current
  358. }
  359. let mutableAttributedInput = NSMutableAttributedString(string: interactiveElementText, attributes: attributes)
  360. return mutableAttributedInput
  361. }
  362. func extractHighlightedElement(from input: NSAttributedString) -> NSMutableAttributedString {
  363. let highlightedElementTagName = ParserConstants.highlightedElementTagName
  364. let highlightedElementID = input.string.getSubstring(inBetween: "[\(highlightedElementTagName) id=", and: "]") ?? input.string
  365. let highlightedElementText = input.string.getSubstring(inBetween: "]", and: "[/\(highlightedElementTagName)]") ?? input.string
  366. guard let richTextAttributes = self.customAdditionalAttributes?[highlightedElementID] else {
  367. return NSMutableAttributedString(string: highlightedElementText)
  368. }
  369. let attributes: [NSAttributedString.Key: Any] = [.highlight: highlightedElementID]
  370. .merging(input.attributes(at: 0, effectiveRange: nil)) { (current, _) in current }
  371. .merging(richTextAttributes) { (current, _) in current }
  372. let mutableAttributedInput = NSMutableAttributedString(string: highlightedElementText, attributes: attributes)
  373. return mutableAttributedInput
  374. }
  375. }