html.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <assert.h>
  5. #include "cmark_ctype.h"
  6. #include "config.h"
  7. #include "cmark.h"
  8. #include "node.h"
  9. #include "buffer.h"
  10. #include "houdini.h"
  11. #include "scanners.h"
  12. #define BUFFER_SIZE 100
  13. // Functions to convert cmark_nodes to HTML strings.
  14. static void escape_html(cmark_strbuf *dest, const unsigned char *source,
  15. bufsize_t length) {
  16. houdini_escape_html0(dest, source, length, 0);
  17. }
  18. static CMARK_INLINE void cr(cmark_strbuf *html) {
  19. if (html->size && html->ptr[html->size - 1] != '\n')
  20. cmark_strbuf_putc(html, '\n');
  21. }
  22. struct render_state {
  23. cmark_strbuf *html;
  24. cmark_node *plain;
  25. };
  26. static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html,
  27. int options) {
  28. char buffer[BUFFER_SIZE];
  29. if (CMARK_OPT_SOURCEPOS & options) {
  30. snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
  31. cmark_node_get_start_line(node), cmark_node_get_start_column(node),
  32. cmark_node_get_end_line(node), cmark_node_get_end_column(node));
  33. cmark_strbuf_puts(html, buffer);
  34. }
  35. }
  36. static int S_render_node(cmark_node *node, cmark_event_type ev_type,
  37. struct render_state *state, int options) {
  38. cmark_node *parent;
  39. cmark_node *grandparent;
  40. cmark_strbuf *html = state->html;
  41. char start_heading[] = "<h0";
  42. char end_heading[] = "</h0";
  43. bool tight;
  44. char buffer[BUFFER_SIZE];
  45. bool entering = (ev_type == CMARK_EVENT_ENTER);
  46. if (state->plain == node) { // back at original node
  47. state->plain = NULL;
  48. }
  49. if (state->plain != NULL) {
  50. switch (node->type) {
  51. case CMARK_NODE_TEXT:
  52. case CMARK_NODE_CODE:
  53. case CMARK_NODE_HTML_INLINE:
  54. escape_html(html, node->as.literal.data, node->as.literal.len);
  55. break;
  56. case CMARK_NODE_LINEBREAK:
  57. case CMARK_NODE_SOFTBREAK:
  58. cmark_strbuf_putc(html, ' ');
  59. break;
  60. default:
  61. break;
  62. }
  63. return 1;
  64. }
  65. switch (node->type) {
  66. case CMARK_NODE_DOCUMENT:
  67. break;
  68. case CMARK_NODE_BLOCK_QUOTE:
  69. if (entering) {
  70. cr(html);
  71. cmark_strbuf_puts(html, "<blockquote");
  72. S_render_sourcepos(node, html, options);
  73. cmark_strbuf_puts(html, ">\n");
  74. } else {
  75. cr(html);
  76. cmark_strbuf_puts(html, "</blockquote>\n");
  77. }
  78. break;
  79. case CMARK_NODE_LIST: {
  80. cmark_list_type list_type = node->as.list.list_type;
  81. int start = node->as.list.start;
  82. if (entering) {
  83. cr(html);
  84. if (list_type == CMARK_BULLET_LIST) {
  85. cmark_strbuf_puts(html, "<ul");
  86. S_render_sourcepos(node, html, options);
  87. cmark_strbuf_puts(html, ">\n");
  88. } else if (start == 1) {
  89. cmark_strbuf_puts(html, "<ol");
  90. S_render_sourcepos(node, html, options);
  91. cmark_strbuf_puts(html, ">\n");
  92. } else {
  93. snprintf(buffer, BUFFER_SIZE, "<ol start=\"%d\"", start);
  94. cmark_strbuf_puts(html, buffer);
  95. S_render_sourcepos(node, html, options);
  96. cmark_strbuf_puts(html, ">\n");
  97. }
  98. } else {
  99. cmark_strbuf_puts(html,
  100. list_type == CMARK_BULLET_LIST ? "</ul>\n" : "</ol>\n");
  101. }
  102. break;
  103. }
  104. case CMARK_NODE_ITEM:
  105. if (entering) {
  106. cr(html);
  107. cmark_strbuf_puts(html, "<li");
  108. S_render_sourcepos(node, html, options);
  109. cmark_strbuf_putc(html, '>');
  110. } else {
  111. cmark_strbuf_puts(html, "</li>\n");
  112. }
  113. break;
  114. case CMARK_NODE_HEADING:
  115. if (entering) {
  116. cr(html);
  117. start_heading[2] = (char)('0' + node->as.heading.level);
  118. cmark_strbuf_puts(html, start_heading);
  119. S_render_sourcepos(node, html, options);
  120. cmark_strbuf_putc(html, '>');
  121. } else {
  122. end_heading[3] = (char)('0' + node->as.heading.level);
  123. cmark_strbuf_puts(html, end_heading);
  124. cmark_strbuf_puts(html, ">\n");
  125. }
  126. break;
  127. case CMARK_NODE_CODE_BLOCK:
  128. cr(html);
  129. if (node->as.code.info.len == 0) {
  130. cmark_strbuf_puts(html, "<pre");
  131. S_render_sourcepos(node, html, options);
  132. cmark_strbuf_puts(html, "><code>");
  133. } else {
  134. bufsize_t first_tag = 0;
  135. while (first_tag < node->as.code.info.len &&
  136. !cmark_isspace(node->as.code.info.data[first_tag])) {
  137. first_tag += 1;
  138. }
  139. cmark_strbuf_puts(html, "<pre");
  140. S_render_sourcepos(node, html, options);
  141. cmark_strbuf_puts(html, "><code class=\"language-");
  142. escape_html(html, node->as.code.info.data, first_tag);
  143. cmark_strbuf_puts(html, "\">");
  144. }
  145. escape_html(html, node->as.code.literal.data, node->as.code.literal.len);
  146. cmark_strbuf_puts(html, "</code></pre>\n");
  147. break;
  148. case CMARK_NODE_HTML_BLOCK:
  149. cr(html);
  150. if (!(options & CMARK_OPT_UNSAFE)) {
  151. cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
  152. } else {
  153. cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
  154. }
  155. cr(html);
  156. break;
  157. case CMARK_NODE_CUSTOM_BLOCK:
  158. cr(html);
  159. if (entering) {
  160. cmark_strbuf_put(html, node->as.custom.on_enter.data,
  161. node->as.custom.on_enter.len);
  162. } else {
  163. cmark_strbuf_put(html, node->as.custom.on_exit.data,
  164. node->as.custom.on_exit.len);
  165. }
  166. cr(html);
  167. break;
  168. case CMARK_NODE_THEMATIC_BREAK:
  169. cr(html);
  170. cmark_strbuf_puts(html, "<hr");
  171. S_render_sourcepos(node, html, options);
  172. cmark_strbuf_puts(html, " />\n");
  173. break;
  174. case CMARK_NODE_PARAGRAPH:
  175. parent = cmark_node_parent(node);
  176. grandparent = cmark_node_parent(parent);
  177. if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) {
  178. tight = grandparent->as.list.tight;
  179. } else {
  180. tight = false;
  181. }
  182. if (!tight) {
  183. if (entering) {
  184. cr(html);
  185. cmark_strbuf_puts(html, "<p");
  186. S_render_sourcepos(node, html, options);
  187. cmark_strbuf_putc(html, '>');
  188. } else {
  189. cmark_strbuf_puts(html, "</p>\n");
  190. }
  191. }
  192. break;
  193. case CMARK_NODE_TEXT:
  194. escape_html(html, node->as.literal.data, node->as.literal.len);
  195. break;
  196. case CMARK_NODE_LINEBREAK:
  197. cmark_strbuf_puts(html, "<br />\n");
  198. break;
  199. case CMARK_NODE_SOFTBREAK:
  200. if (options & CMARK_OPT_HARDBREAKS) {
  201. cmark_strbuf_puts(html, "<br />\n");
  202. } else if (options & CMARK_OPT_NOBREAKS) {
  203. cmark_strbuf_putc(html, ' ');
  204. } else {
  205. cmark_strbuf_putc(html, '\n');
  206. }
  207. break;
  208. case CMARK_NODE_CODE:
  209. cmark_strbuf_puts(html, "<code>");
  210. escape_html(html, node->as.literal.data, node->as.literal.len);
  211. cmark_strbuf_puts(html, "</code>");
  212. break;
  213. case CMARK_NODE_HTML_INLINE:
  214. if (!(options & CMARK_OPT_UNSAFE)) {
  215. cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
  216. } else {
  217. cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
  218. }
  219. break;
  220. case CMARK_NODE_CUSTOM_INLINE:
  221. if (entering) {
  222. cmark_strbuf_put(html, node->as.custom.on_enter.data,
  223. node->as.custom.on_enter.len);
  224. } else {
  225. cmark_strbuf_put(html, node->as.custom.on_exit.data,
  226. node->as.custom.on_exit.len);
  227. }
  228. break;
  229. case CMARK_NODE_STRONG:
  230. if (entering) {
  231. cmark_strbuf_puts(html, "<strong>");
  232. } else {
  233. cmark_strbuf_puts(html, "</strong>");
  234. }
  235. break;
  236. case CMARK_NODE_EMPH:
  237. if (entering) {
  238. cmark_strbuf_puts(html, "<em>");
  239. } else {
  240. cmark_strbuf_puts(html, "</em>");
  241. }
  242. break;
  243. case CMARK_NODE_LINK:
  244. if (entering) {
  245. cmark_strbuf_puts(html, "<a href=\"");
  246. if ((options & CMARK_OPT_UNSAFE) ||
  247. !(scan_dangerous_url(&node->as.link.url, 0))) {
  248. houdini_escape_href(html, node->as.link.url.data,
  249. node->as.link.url.len);
  250. }
  251. if (node->as.link.title.len) {
  252. cmark_strbuf_puts(html, "\" title=\"");
  253. escape_html(html, node->as.link.title.data, node->as.link.title.len);
  254. }
  255. cmark_strbuf_puts(html, "\">");
  256. } else {
  257. cmark_strbuf_puts(html, "</a>");
  258. }
  259. break;
  260. case CMARK_NODE_IMAGE:
  261. if (entering) {
  262. cmark_strbuf_puts(html, "<img src=\"");
  263. if ((options & CMARK_OPT_UNSAFE) ||
  264. !(scan_dangerous_url(&node->as.link.url, 0))) {
  265. houdini_escape_href(html, node->as.link.url.data,
  266. node->as.link.url.len);
  267. }
  268. cmark_strbuf_puts(html, "\" alt=\"");
  269. state->plain = node;
  270. } else {
  271. if (node->as.link.title.len) {
  272. cmark_strbuf_puts(html, "\" title=\"");
  273. escape_html(html, node->as.link.title.data, node->as.link.title.len);
  274. }
  275. cmark_strbuf_puts(html, "\" />");
  276. }
  277. break;
  278. default:
  279. assert(false);
  280. break;
  281. }
  282. // cmark_strbuf_putc(html, 'x');
  283. return 1;
  284. }
  285. char *cmark_render_html(cmark_node *root, int options) {
  286. char *result;
  287. cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root));
  288. cmark_event_type ev_type;
  289. cmark_node *cur;
  290. struct render_state state = {&html, NULL};
  291. cmark_iter *iter = cmark_iter_new(root);
  292. while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
  293. cur = cmark_iter_get_node(iter);
  294. S_render_node(cur, ev_type, &state, options);
  295. }
  296. result = (char *)cmark_strbuf_detach(&html);
  297. cmark_iter_free(iter);
  298. return result;
  299. }