latex.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <assert.h>
  5. #include "config.h"
  6. #include "cmark.h"
  7. #include "node.h"
  8. #include "buffer.h"
  9. #include "utf8.h"
  10. #include "scanners.h"
  11. #include "render.h"
  12. #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
  13. #define LIT(s) renderer->out(renderer, s, false, LITERAL)
  14. #define CR() renderer->cr(renderer)
  15. #define BLANKLINE() renderer->blankline(renderer)
  16. #define LIST_NUMBER_STRING_SIZE 20
  17. static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
  18. int32_t c, unsigned char nextc) {
  19. if (escape == LITERAL) {
  20. cmark_render_code_point(renderer, c);
  21. return;
  22. }
  23. switch (c) {
  24. case 123: // '{'
  25. case 125: // '}'
  26. case 35: // '#'
  27. case 37: // '%'
  28. case 38: // '&'
  29. cmark_render_ascii(renderer, "\\");
  30. cmark_render_code_point(renderer, c);
  31. break;
  32. case 36: // '$'
  33. case 95: // '_'
  34. if (escape == NORMAL) {
  35. cmark_render_ascii(renderer, "\\");
  36. }
  37. cmark_render_code_point(renderer, c);
  38. break;
  39. case 45: // '-'
  40. if (nextc == 45) { // prevent ligature
  41. cmark_render_ascii(renderer, "-{}");
  42. } else {
  43. cmark_render_ascii(renderer, "-");
  44. }
  45. break;
  46. case 126: // '~'
  47. if (escape == NORMAL) {
  48. cmark_render_ascii(renderer, "\\textasciitilde{}");
  49. } else {
  50. cmark_render_code_point(renderer, c);
  51. }
  52. break;
  53. case 94: // '^'
  54. cmark_render_ascii(renderer, "\\^{}");
  55. break;
  56. case 92: // '\\'
  57. if (escape == URL) {
  58. // / acts as path sep even on windows:
  59. cmark_render_ascii(renderer, "/");
  60. } else {
  61. cmark_render_ascii(renderer, "\\textbackslash{}");
  62. }
  63. break;
  64. case 124: // '|'
  65. cmark_render_ascii(renderer, "\\textbar{}");
  66. break;
  67. case 60: // '<'
  68. cmark_render_ascii(renderer, "\\textless{}");
  69. break;
  70. case 62: // '>'
  71. cmark_render_ascii(renderer, "\\textgreater{}");
  72. break;
  73. case 91: // '['
  74. case 93: // ']'
  75. cmark_render_ascii(renderer, "{");
  76. cmark_render_code_point(renderer, c);
  77. cmark_render_ascii(renderer, "}");
  78. break;
  79. case 34: // '"'
  80. cmark_render_ascii(renderer, "\\textquotedbl{}");
  81. // requires \usepackage[T1]{fontenc}
  82. break;
  83. case 39: // '\''
  84. cmark_render_ascii(renderer, "\\textquotesingle{}");
  85. // requires \usepackage{textcomp}
  86. break;
  87. case 160: // nbsp
  88. cmark_render_ascii(renderer, "~");
  89. break;
  90. case 8230: // hellip
  91. cmark_render_ascii(renderer, "\\ldots{}");
  92. break;
  93. case 8216: // lsquo
  94. if (escape == NORMAL) {
  95. cmark_render_ascii(renderer, "`");
  96. } else {
  97. cmark_render_code_point(renderer, c);
  98. }
  99. break;
  100. case 8217: // rsquo
  101. if (escape == NORMAL) {
  102. cmark_render_ascii(renderer, "\'");
  103. } else {
  104. cmark_render_code_point(renderer, c);
  105. }
  106. break;
  107. case 8220: // ldquo
  108. if (escape == NORMAL) {
  109. cmark_render_ascii(renderer, "``");
  110. } else {
  111. cmark_render_code_point(renderer, c);
  112. }
  113. break;
  114. case 8221: // rdquo
  115. if (escape == NORMAL) {
  116. cmark_render_ascii(renderer, "''");
  117. } else {
  118. cmark_render_code_point(renderer, c);
  119. }
  120. break;
  121. case 8212: // emdash
  122. if (escape == NORMAL) {
  123. cmark_render_ascii(renderer, "---");
  124. } else {
  125. cmark_render_code_point(renderer, c);
  126. }
  127. break;
  128. case 8211: // endash
  129. if (escape == NORMAL) {
  130. cmark_render_ascii(renderer, "--");
  131. } else {
  132. cmark_render_code_point(renderer, c);
  133. }
  134. break;
  135. default:
  136. cmark_render_code_point(renderer, c);
  137. }
  138. }
  139. typedef enum {
  140. NO_LINK,
  141. URL_AUTOLINK,
  142. EMAIL_AUTOLINK,
  143. NORMAL_LINK,
  144. INTERNAL_LINK
  145. } link_type;
  146. static link_type get_link_type(cmark_node *node) {
  147. size_t title_len, url_len;
  148. cmark_node *link_text;
  149. char *realurl;
  150. int realurllen;
  151. bool isemail = false;
  152. if (node->type != CMARK_NODE_LINK) {
  153. return NO_LINK;
  154. }
  155. const char *url = cmark_node_get_url(node);
  156. cmark_chunk url_chunk = cmark_chunk_literal(url);
  157. if (url && *url == '#') {
  158. return INTERNAL_LINK;
  159. }
  160. url_len = strlen(url);
  161. if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
  162. return NO_LINK;
  163. }
  164. const char *title = cmark_node_get_title(node);
  165. title_len = strlen(title);
  166. // if it has a title, we can't treat it as an autolink:
  167. if (title_len == 0) {
  168. link_text = node->first_child;
  169. cmark_consolidate_text_nodes(link_text);
  170. if (!link_text)
  171. return NO_LINK;
  172. realurl = (char *)url;
  173. realurllen = (int)url_len;
  174. if (strncmp(realurl, "mailto:", 7) == 0) {
  175. realurl += 7;
  176. realurllen -= 7;
  177. isemail = true;
  178. }
  179. if (realurllen == link_text->as.literal.len &&
  180. strncmp(realurl, (char *)link_text->as.literal.data,
  181. link_text->as.literal.len) == 0) {
  182. if (isemail) {
  183. return EMAIL_AUTOLINK;
  184. } else {
  185. return URL_AUTOLINK;
  186. }
  187. }
  188. }
  189. return NORMAL_LINK;
  190. }
  191. static int S_get_enumlevel(cmark_node *node) {
  192. int enumlevel = 0;
  193. cmark_node *tmp = node;
  194. while (tmp) {
  195. if (tmp->type == CMARK_NODE_LIST &&
  196. cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
  197. enumlevel++;
  198. }
  199. tmp = tmp->parent;
  200. }
  201. return enumlevel;
  202. }
  203. static int S_render_node(cmark_renderer *renderer, cmark_node *node,
  204. cmark_event_type ev_type, int options) {
  205. int list_number;
  206. int enumlevel;
  207. char list_number_string[LIST_NUMBER_STRING_SIZE];
  208. bool entering = (ev_type == CMARK_EVENT_ENTER);
  209. cmark_list_type list_type;
  210. bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
  211. // avoid warning about unused parameter:
  212. (void)(options);
  213. switch (node->type) {
  214. case CMARK_NODE_DOCUMENT:
  215. break;
  216. case CMARK_NODE_BLOCK_QUOTE:
  217. if (entering) {
  218. LIT("\\begin{quote}");
  219. CR();
  220. } else {
  221. LIT("\\end{quote}");
  222. BLANKLINE();
  223. }
  224. break;
  225. case CMARK_NODE_LIST:
  226. list_type = cmark_node_get_list_type(node);
  227. if (entering) {
  228. LIT("\\begin{");
  229. LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
  230. LIT("}");
  231. CR();
  232. list_number = cmark_node_get_list_start(node);
  233. if (list_number > 1) {
  234. enumlevel = S_get_enumlevel(node);
  235. // latex normally supports only five levels
  236. if (enumlevel >= 1 && enumlevel <= 5) {
  237. snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
  238. list_number);
  239. LIT("\\setcounter{enum");
  240. switch (enumlevel) {
  241. case 1: LIT("i"); break;
  242. case 2: LIT("ii"); break;
  243. case 3: LIT("iii"); break;
  244. case 4: LIT("iv"); break;
  245. case 5: LIT("v"); break;
  246. default: LIT("i"); break;
  247. }
  248. LIT("}{");
  249. OUT(list_number_string, false, NORMAL);
  250. LIT("}");
  251. }
  252. CR();
  253. }
  254. } else {
  255. LIT("\\end{");
  256. LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
  257. LIT("}");
  258. BLANKLINE();
  259. }
  260. break;
  261. case CMARK_NODE_ITEM:
  262. if (entering) {
  263. LIT("\\item ");
  264. } else {
  265. CR();
  266. }
  267. break;
  268. case CMARK_NODE_HEADING:
  269. if (entering) {
  270. switch (cmark_node_get_heading_level(node)) {
  271. case 1:
  272. LIT("\\section");
  273. break;
  274. case 2:
  275. LIT("\\subsection");
  276. break;
  277. case 3:
  278. LIT("\\subsubsection");
  279. break;
  280. case 4:
  281. LIT("\\paragraph");
  282. break;
  283. case 5:
  284. LIT("\\subparagraph");
  285. break;
  286. }
  287. LIT("{");
  288. } else {
  289. LIT("}");
  290. BLANKLINE();
  291. }
  292. break;
  293. case CMARK_NODE_CODE_BLOCK:
  294. CR();
  295. LIT("\\begin{verbatim}");
  296. CR();
  297. OUT(cmark_node_get_literal(node), false, LITERAL);
  298. CR();
  299. LIT("\\end{verbatim}");
  300. BLANKLINE();
  301. break;
  302. case CMARK_NODE_HTML_BLOCK:
  303. break;
  304. case CMARK_NODE_CUSTOM_BLOCK:
  305. CR();
  306. OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
  307. false, LITERAL);
  308. CR();
  309. break;
  310. case CMARK_NODE_THEMATIC_BREAK:
  311. BLANKLINE();
  312. LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
  313. BLANKLINE();
  314. break;
  315. case CMARK_NODE_PARAGRAPH:
  316. if (!entering) {
  317. BLANKLINE();
  318. }
  319. break;
  320. case CMARK_NODE_TEXT:
  321. OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
  322. break;
  323. case CMARK_NODE_LINEBREAK:
  324. LIT("\\\\");
  325. CR();
  326. break;
  327. case CMARK_NODE_SOFTBREAK:
  328. if (options & CMARK_OPT_HARDBREAKS) {
  329. LIT("\\\\");
  330. CR();
  331. } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
  332. CR();
  333. } else {
  334. OUT(" ", allow_wrap, NORMAL);
  335. }
  336. break;
  337. case CMARK_NODE_CODE:
  338. LIT("\\texttt{");
  339. OUT(cmark_node_get_literal(node), false, NORMAL);
  340. LIT("}");
  341. break;
  342. case CMARK_NODE_HTML_INLINE:
  343. break;
  344. case CMARK_NODE_CUSTOM_INLINE:
  345. OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
  346. false, LITERAL);
  347. break;
  348. case CMARK_NODE_STRONG:
  349. if (entering) {
  350. LIT("\\textbf{");
  351. } else {
  352. LIT("}");
  353. }
  354. break;
  355. case CMARK_NODE_EMPH:
  356. if (entering) {
  357. LIT("\\emph{");
  358. } else {
  359. LIT("}");
  360. }
  361. break;
  362. case CMARK_NODE_LINK:
  363. if (entering) {
  364. const char *url = cmark_node_get_url(node);
  365. // requires \usepackage{hyperref}
  366. switch (get_link_type(node)) {
  367. case URL_AUTOLINK:
  368. LIT("\\url{");
  369. OUT(url, false, URL);
  370. LIT("}");
  371. return 0; // Don't process further nodes to avoid double-rendering artefacts
  372. case EMAIL_AUTOLINK:
  373. LIT("\\href{");
  374. OUT(url, false, URL);
  375. LIT("}\\nolinkurl{");
  376. break;
  377. case NORMAL_LINK:
  378. LIT("\\href{");
  379. OUT(url, false, URL);
  380. LIT("}{");
  381. break;
  382. case INTERNAL_LINK:
  383. LIT("\\protect\\hyperlink{");
  384. OUT(url + 1, false, URL);
  385. LIT("}{");
  386. break;
  387. case NO_LINK:
  388. LIT("{"); // error?
  389. }
  390. } else {
  391. LIT("}");
  392. }
  393. break;
  394. case CMARK_NODE_IMAGE:
  395. if (entering) {
  396. LIT("\\protect\\includegraphics{");
  397. // requires \include{graphicx}
  398. OUT(cmark_node_get_url(node), false, URL);
  399. LIT("}");
  400. return 0;
  401. }
  402. break;
  403. default:
  404. assert(false);
  405. break;
  406. }
  407. return 1;
  408. }
  409. char *cmark_render_latex(cmark_node *root, int options, int width) {
  410. return cmark_render(root, options, width, outc, S_render_node);
  411. }