base_string.c 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. #include "fitz-internal.h"
  2. char *
  3. fz_strsep(char **stringp, const char *delim)
  4. {
  5. char *ret = *stringp;
  6. if (!ret) return NULL;
  7. if ((*stringp = strpbrk(*stringp, delim)))
  8. *((*stringp)++) = '\0';
  9. return ret;
  10. }
  11. int
  12. fz_strlcpy(char *dst, const char *src, int siz)
  13. {
  14. register char *d = dst;
  15. register const char *s = src;
  16. register int n = siz;
  17. /* Copy as many bytes as will fit */
  18. if (n != 0 && --n != 0) {
  19. do {
  20. if ((*d++ = *s++) == 0)
  21. break;
  22. } while (--n != 0);
  23. }
  24. /* Not enough room in dst, add NUL and traverse rest of src */
  25. if (n == 0) {
  26. if (siz != 0)
  27. *d = '\0'; /* NUL-terminate dst */
  28. while (*s++)
  29. ;
  30. }
  31. return(s - src - 1); /* count does not include NUL */
  32. }
  33. int
  34. fz_strlcat(char *dst, const char *src, int siz)
  35. {
  36. register char *d = dst;
  37. register const char *s = src;
  38. register int n = siz;
  39. int dlen;
  40. /* Find the end of dst and adjust bytes left but don't go past end */
  41. while (*d != '\0' && n-- != 0)
  42. d++;
  43. dlen = d - dst;
  44. n = siz - dlen;
  45. if (n == 0)
  46. return dlen + strlen(s);
  47. while (*s != '\0') {
  48. if (n != 1) {
  49. *d++ = *s;
  50. n--;
  51. }
  52. s++;
  53. }
  54. *d = '\0';
  55. return dlen + (s - src); /* count does not include NUL */
  56. }
  57. enum
  58. {
  59. UTFmax = 4, /* maximum bytes per rune */
  60. Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
  61. Runeself = 0x80, /* rune and UTF sequences are the same (<) */
  62. Runeerror = 0xFFFD, /* decoding error in UTF */
  63. Runemax = 0x10FFFF, /* maximum rune value */
  64. };
  65. enum
  66. {
  67. Bit1 = 7,
  68. Bitx = 6,
  69. Bit2 = 5,
  70. Bit3 = 4,
  71. Bit4 = 3,
  72. Bit5 = 2,
  73. T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
  74. Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
  75. T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
  76. T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
  77. T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
  78. T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
  79. Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
  80. Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
  81. Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
  82. Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
  83. Maskx = (1<<Bitx)-1, /* 0011 1111 */
  84. Testx = Maskx ^ 0xFF, /* 1100 0000 */
  85. Bad = Runeerror,
  86. };
  87. int
  88. fz_chartorune(int *rune, char *str)
  89. {
  90. int c, c1, c2, c3;
  91. long l;
  92. /*
  93. * one character sequence
  94. * 00000-0007F => T1
  95. */
  96. c = *(unsigned char*)str;
  97. if(c < Tx) {
  98. *rune = c;
  99. return 1;
  100. }
  101. /*
  102. * two character sequence
  103. * 0080-07FF => T2 Tx
  104. */
  105. c1 = *(unsigned char*)(str+1) ^ Tx;
  106. if(c1 & Testx)
  107. goto bad;
  108. if(c < T3) {
  109. if(c < T2)
  110. goto bad;
  111. l = ((c << Bitx) | c1) & Rune2;
  112. if(l <= Rune1)
  113. goto bad;
  114. *rune = l;
  115. return 2;
  116. }
  117. /*
  118. * three character sequence
  119. * 0800-FFFF => T3 Tx Tx
  120. */
  121. c2 = *(unsigned char*)(str+2) ^ Tx;
  122. if(c2 & Testx)
  123. goto bad;
  124. if(c < T4) {
  125. l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
  126. if(l <= Rune2)
  127. goto bad;
  128. *rune = l;
  129. return 3;
  130. }
  131. /*
  132. * four character sequence (21-bit value)
  133. * 10000-1FFFFF => T4 Tx Tx Tx
  134. */
  135. c3 = *(unsigned char*)(str+3) ^ Tx;
  136. if (c3 & Testx)
  137. goto bad;
  138. if (c < T5) {
  139. l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
  140. if (l <= Rune3)
  141. goto bad;
  142. *rune = l;
  143. return 4;
  144. }
  145. /*
  146. * Support for 5-byte or longer UTF-8 would go here, but
  147. * since we don't have that, we'll just fall through to bad.
  148. */
  149. /*
  150. * bad decoding
  151. */
  152. bad:
  153. *rune = Bad;
  154. return 1;
  155. }
  156. int
  157. fz_runetochar(char *str, int rune)
  158. {
  159. /* Runes are signed, so convert to unsigned for range check. */
  160. unsigned long c = (unsigned long)rune;
  161. /*
  162. * one character sequence
  163. * 00000-0007F => 00-7F
  164. */
  165. if(c <= Rune1) {
  166. str[0] = c;
  167. return 1;
  168. }
  169. /*
  170. * two character sequence
  171. * 0080-07FF => T2 Tx
  172. */
  173. if(c <= Rune2) {
  174. str[0] = T2 | (c >> 1*Bitx);
  175. str[1] = Tx | (c & Maskx);
  176. return 2;
  177. }
  178. /*
  179. * If the Rune is out of range, convert it to the error rune.
  180. * Do this test here because the error rune encodes to three bytes.
  181. * Doing it earlier would duplicate work, since an out of range
  182. * Rune wouldn't have fit in one or two bytes.
  183. */
  184. if (c > Runemax)
  185. c = Runeerror;
  186. /*
  187. * three character sequence
  188. * 0800-FFFF => T3 Tx Tx
  189. */
  190. if (c <= Rune3) {
  191. str[0] = T3 | (c >> 2*Bitx);
  192. str[1] = Tx | ((c >> 1*Bitx) & Maskx);
  193. str[2] = Tx | (c & Maskx);
  194. return 3;
  195. }
  196. /*
  197. * four character sequence (21-bit value)
  198. * 10000-1FFFFF => T4 Tx Tx Tx
  199. */
  200. str[0] = T4 | (c >> 3*Bitx);
  201. str[1] = Tx | ((c >> 2*Bitx) & Maskx);
  202. str[2] = Tx | ((c >> 1*Bitx) & Maskx);
  203. str[3] = Tx | (c & Maskx);
  204. return 4;
  205. }
  206. int
  207. fz_runelen(int c)
  208. {
  209. char str[10];
  210. return fz_runetochar(str, c);
  211. }
  212. float fz_atof(const char *s)
  213. {
  214. double d;
  215. /* The errno voodoo here checks for us reading numbers that are too
  216. * big to fit into a double. The checks for FLT_MAX ensure that we
  217. * don't read a number that's OK as a double and then become invalid
  218. * as we convert to a float. */
  219. errno = 0;
  220. d = strtod(s, NULL);
  221. if (errno == ERANGE || isnan(d)) {
  222. /* Return 1.0, as it's a small known value that won't cause a divide by 0. */
  223. return 1.0;
  224. }
  225. d = fz_clampd(d, -FLT_MAX, FLT_MAX);
  226. return (float)d;
  227. }
  228. int fz_atoi(const char *s)
  229. {
  230. if (s == NULL)
  231. return 0;
  232. return atoi(s);
  233. }