buffer.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. #include <stdarg.h>
  2. #include <string.h>
  3. #include <assert.h>
  4. #include <string.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <stdint.h>
  8. #include <limits.h>
  9. #include "config.h"
  10. #include "cmark_ctype.h"
  11. #include "buffer.h"
  12. /* Used as default value for cmark_strbuf->ptr so that people can always
  13. * assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
  14. */
  15. unsigned char cmark_strbuf__initbuf[1];
  16. #ifndef MIN
  17. #define MIN(x, y) ((x < y) ? x : y)
  18. #endif
  19. void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
  20. bufsize_t initial_size) {
  21. buf->mem = mem;
  22. buf->asize = 0;
  23. buf->size = 0;
  24. buf->ptr = cmark_strbuf__initbuf;
  25. if (initial_size > 0)
  26. cmark_strbuf_grow(buf, initial_size);
  27. }
  28. static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
  29. cmark_strbuf_grow(buf, buf->size + add);
  30. }
  31. void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
  32. assert(target_size > 0);
  33. if (target_size < buf->asize)
  34. return;
  35. if (target_size > (bufsize_t)(INT32_MAX / 2)) {
  36. fprintf(stderr,
  37. "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n",
  38. (INT32_MAX / 2));
  39. abort();
  40. }
  41. /* Oversize the buffer by 50% to guarantee amortized linear time
  42. * complexity on append operations. */
  43. bufsize_t new_size = target_size + target_size / 2;
  44. new_size += 1;
  45. new_size = (new_size + 7) & ~7;
  46. buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
  47. new_size);
  48. buf->asize = new_size;
  49. }
  50. bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
  51. void cmark_strbuf_free(cmark_strbuf *buf) {
  52. if (!buf)
  53. return;
  54. if (buf->ptr != cmark_strbuf__initbuf)
  55. buf->mem->free(buf->ptr);
  56. cmark_strbuf_init(buf->mem, buf, 0);
  57. }
  58. void cmark_strbuf_clear(cmark_strbuf *buf) {
  59. buf->size = 0;
  60. if (buf->asize > 0)
  61. buf->ptr[0] = '\0';
  62. }
  63. void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
  64. bufsize_t len) {
  65. if (len <= 0 || data == NULL) {
  66. cmark_strbuf_clear(buf);
  67. } else {
  68. if (data != buf->ptr) {
  69. if (len >= buf->asize)
  70. cmark_strbuf_grow(buf, len);
  71. memmove(buf->ptr, data, len);
  72. }
  73. buf->size = len;
  74. buf->ptr[buf->size] = '\0';
  75. }
  76. }
  77. void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) {
  78. cmark_strbuf_set(buf, (const unsigned char *)string,
  79. string ? strlen(string) : 0);
  80. }
  81. void cmark_strbuf_putc(cmark_strbuf *buf, int c) {
  82. S_strbuf_grow_by(buf, 1);
  83. buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
  84. buf->ptr[buf->size] = '\0';
  85. }
  86. void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
  87. bufsize_t len) {
  88. if (len <= 0)
  89. return;
  90. S_strbuf_grow_by(buf, len);
  91. memmove(buf->ptr + buf->size, data, len);
  92. buf->size += len;
  93. buf->ptr[buf->size] = '\0';
  94. }
  95. void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) {
  96. cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string));
  97. }
  98. void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
  99. const cmark_strbuf *buf) {
  100. bufsize_t copylen;
  101. assert(buf);
  102. if (!data || datasize <= 0)
  103. return;
  104. data[0] = '\0';
  105. if (buf->size == 0 || buf->asize <= 0)
  106. return;
  107. copylen = buf->size;
  108. if (copylen > datasize - 1)
  109. copylen = datasize - 1;
  110. memmove(data, buf->ptr, copylen);
  111. data[copylen] = '\0';
  112. }
  113. void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) {
  114. cmark_strbuf t = *buf_a;
  115. *buf_a = *buf_b;
  116. *buf_b = t;
  117. }
  118. unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) {
  119. unsigned char *data = buf->ptr;
  120. if (buf->asize == 0) {
  121. /* return an empty string */
  122. return (unsigned char *)buf->mem->calloc(1, 1);
  123. }
  124. cmark_strbuf_init(buf->mem, buf, 0);
  125. return data;
  126. }
  127. int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) {
  128. int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
  129. return (result != 0) ? result
  130. : (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
  131. }
  132. bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
  133. if (pos >= buf->size)
  134. return -1;
  135. if (pos < 0)
  136. pos = 0;
  137. const unsigned char *p =
  138. (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
  139. if (!p)
  140. return -1;
  141. return (bufsize_t)(p - (const unsigned char *)buf->ptr);
  142. }
  143. bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
  144. if (pos < 0 || buf->size == 0)
  145. return -1;
  146. if (pos >= buf->size)
  147. pos = buf->size - 1;
  148. bufsize_t i;
  149. for (i = pos; i >= 0; i--) {
  150. if (buf->ptr[i] == (unsigned char)c)
  151. return i;
  152. }
  153. return -1;
  154. }
  155. void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) {
  156. if (len < 0)
  157. len = 0;
  158. if (len < buf->size) {
  159. buf->size = len;
  160. buf->ptr[buf->size] = '\0';
  161. }
  162. }
  163. void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) {
  164. if (n > 0) {
  165. if (n > buf->size)
  166. n = buf->size;
  167. buf->size = buf->size - n;
  168. if (buf->size)
  169. memmove(buf->ptr, buf->ptr + n, buf->size);
  170. buf->ptr[buf->size] = '\0';
  171. }
  172. }
  173. void cmark_strbuf_rtrim(cmark_strbuf *buf) {
  174. if (!buf->size)
  175. return;
  176. while (buf->size > 0) {
  177. if (!cmark_isspace(buf->ptr[buf->size - 1]))
  178. break;
  179. buf->size--;
  180. }
  181. buf->ptr[buf->size] = '\0';
  182. }
  183. void cmark_strbuf_trim(cmark_strbuf *buf) {
  184. bufsize_t i = 0;
  185. if (!buf->size)
  186. return;
  187. while (i < buf->size && cmark_isspace(buf->ptr[i]))
  188. i++;
  189. cmark_strbuf_drop(buf, i);
  190. cmark_strbuf_rtrim(buf);
  191. }
  192. // Destructively modify string, collapsing consecutive
  193. // space and newline characters into a single space.
  194. void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) {
  195. bool last_char_was_space = false;
  196. bufsize_t r, w;
  197. for (r = 0, w = 0; r < s->size; ++r) {
  198. if (cmark_isspace(s->ptr[r])) {
  199. if (!last_char_was_space) {
  200. s->ptr[w++] = ' ';
  201. last_char_was_space = true;
  202. }
  203. } else {
  204. s->ptr[w++] = s->ptr[r];
  205. last_char_was_space = false;
  206. }
  207. }
  208. cmark_strbuf_truncate(s, w);
  209. }
  210. // Destructively unescape a string: remove backslashes before punctuation chars.
  211. extern void cmark_strbuf_unescape(cmark_strbuf *buf) {
  212. bufsize_t r, w;
  213. for (r = 0, w = 0; r < buf->size; ++r) {
  214. if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
  215. r++;
  216. buf->ptr[w++] = buf->ptr[r];
  217. }
  218. cmark_strbuf_truncate(buf, w);
  219. }