| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411 |
- #include "fitz-internal.h"
- #include "mupdf-internal.h"
- /*
- * CMap parser
- */
- enum
- {
- TOK_USECMAP = PDF_NUM_TOKENS,
- TOK_BEGIN_CODESPACE_RANGE,
- TOK_END_CODESPACE_RANGE,
- TOK_BEGIN_BF_CHAR,
- TOK_END_BF_CHAR,
- TOK_BEGIN_BF_RANGE,
- TOK_END_BF_RANGE,
- TOK_BEGIN_CID_CHAR,
- TOK_END_CID_CHAR,
- TOK_BEGIN_CID_RANGE,
- TOK_END_CID_RANGE,
- TOK_END_CMAP
- };
- static int
- pdf_cmap_token_from_keyword(char *key)
- {
- if (!strcmp(key, "usecmap")) return TOK_USECMAP;
- if (!strcmp(key, "begincodespacerange")) return TOK_BEGIN_CODESPACE_RANGE;
- if (!strcmp(key, "endcodespacerange")) return TOK_END_CODESPACE_RANGE;
- if (!strcmp(key, "beginbfchar")) return TOK_BEGIN_BF_CHAR;
- if (!strcmp(key, "endbfchar")) return TOK_END_BF_CHAR;
- if (!strcmp(key, "beginbfrange")) return TOK_BEGIN_BF_RANGE;
- if (!strcmp(key, "endbfrange")) return TOK_END_BF_RANGE;
- if (!strcmp(key, "begincidchar")) return TOK_BEGIN_CID_CHAR;
- if (!strcmp(key, "endcidchar")) return TOK_END_CID_CHAR;
- if (!strcmp(key, "begincidrange")) return TOK_BEGIN_CID_RANGE;
- if (!strcmp(key, "endcidrange")) return TOK_END_CID_RANGE;
- if (!strcmp(key, "endcmap")) return TOK_END_CMAP;
- return PDF_TOK_KEYWORD;
- }
- static int
- pdf_code_from_string(char *buf, int len)
- {
- int a = 0;
- while (len--)
- a = (a << 8) | *(unsigned char *)buf++;
- return a;
- }
- static int
- pdf_lex_cmap(fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok = pdf_lex(file, buf);
- if (tok == PDF_TOK_KEYWORD)
- tok = pdf_cmap_token_from_keyword(buf->scratch);
- return tok;
- }
- static void
- pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok;
- tok = pdf_lex_cmap(file, buf);
- if (tok == PDF_TOK_NAME)
- fz_strlcpy(cmap->cmap_name, buf->scratch, sizeof(cmap->cmap_name));
- else
- fz_warn(ctx, "expected name after CMapName in cmap");
- }
- static void
- pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok;
- tok = pdf_lex_cmap(file, buf);
- if (tok == PDF_TOK_INT)
- pdf_set_cmap_wmode(ctx, cmap, buf->i);
- else
- fz_warn(ctx, "expected integer after WMode in cmap");
- }
- static void
- pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok;
- int lo, hi;
- while (1)
- {
- tok = pdf_lex_cmap(file, buf);
- if (tok == TOK_END_CODESPACE_RANGE)
- return;
- else if (tok == PDF_TOK_STRING)
- {
- lo = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, buf);
- if (tok == PDF_TOK_STRING)
- {
- hi = pdf_code_from_string(buf->scratch, buf->len);
- pdf_add_codespace(ctx, cmap, lo, hi, buf->len);
- }
- else break;
- }
- else break;
- }
- fz_throw(ctx, "expected string or endcodespacerange");
- }
- static void
- pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok;
- int lo, hi, dst;
- while (1)
- {
- tok = pdf_lex_cmap(file, buf);
- if (tok == TOK_END_CID_RANGE)
- return;
- else if (tok != PDF_TOK_STRING)
- fz_throw(ctx, "expected string or endcidrange");
- lo = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, buf);
- if (tok != PDF_TOK_STRING)
- fz_throw(ctx, "expected string");
- hi = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, buf);
- if (tok != PDF_TOK_INT)
- fz_throw(ctx, "expected integer");
- dst = buf->i;
- pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
- }
- }
- static void
- pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok;
- int src, dst;
- while (1)
- {
- tok = pdf_lex_cmap(file, buf);
- if (tok == TOK_END_CID_CHAR)
- return;
- else if (tok != PDF_TOK_STRING)
- fz_throw(ctx, "expected string or endcidchar");
- src = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, buf);
- if (tok != PDF_TOK_INT)
- fz_throw(ctx, "expected integer");
- dst = buf->i;
- pdf_map_range_to_range(ctx, cmap, src, src, dst);
- }
- }
- static void
- pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf, int lo, int hi)
- {
- pdf_token tok;
- int dst[256];
- int i;
- while (1)
- {
- tok = pdf_lex_cmap(file, buf);
- if (tok == PDF_TOK_CLOSE_ARRAY)
- return;
- /* Note: does not handle [ /Name /Name ... ] */
- else if (tok != PDF_TOK_STRING)
- fz_throw(ctx, "expected string or ]");
- if (buf->len / 2)
- {
- int len = fz_mini(buf->len / 2, nelem(dst));
- for (i = 0; i < len; i++)
- dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
- pdf_map_one_to_many(ctx, cmap, lo, dst, buf->len / 2);
- }
- lo ++;
- }
- }
- static void
- pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok;
- int lo, hi, dst;
- while (1)
- {
- tok = pdf_lex_cmap(file, buf);
- if (tok == TOK_END_BF_RANGE)
- return;
- else if (tok != PDF_TOK_STRING)
- fz_throw(ctx, "expected string or endbfrange");
- lo = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, buf);
- if (tok != PDF_TOK_STRING)
- fz_throw(ctx, "expected string");
- hi = pdf_code_from_string(buf->scratch, buf->len);
- if (lo < 0 || lo > 65535 || hi < 0 || hi > 65535 || lo > hi)
- {
- fz_warn(ctx, "bf_range limits out of range in cmap %s", cmap->cmap_name);
- return;
- }
- tok = pdf_lex_cmap(file, buf);
- if (tok == PDF_TOK_STRING)
- {
- if (buf->len == 2)
- {
- dst = pdf_code_from_string(buf->scratch, buf->len);
- pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
- }
- else
- {
- int dststr[256];
- int i;
- if (buf->len / 2)
- {
- int len = fz_mini(buf->len / 2, nelem(dststr));
- for (i = 0; i < len; i++)
- dststr[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
- while (lo <= hi)
- {
- dststr[i-1] ++;
- pdf_map_one_to_many(ctx, cmap, lo, dststr, i);
- lo ++;
- }
- }
- }
- }
- else if (tok == PDF_TOK_OPEN_ARRAY)
- {
- pdf_parse_bf_range_array(ctx, cmap, file, buf, lo, hi);
- }
- else
- {
- fz_throw(ctx, "expected string or array or endbfrange");
- }
- }
- }
- static void
- pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok;
- int dst[256];
- int src;
- int i;
- while (1)
- {
- tok = pdf_lex_cmap(file, buf);
- if (tok == TOK_END_BF_CHAR)
- return;
- else if (tok != PDF_TOK_STRING)
- fz_throw(ctx, "expected string or endbfchar");
- src = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, buf);
- /* Note: does not handle /dstName */
- if (tok != PDF_TOK_STRING)
- fz_throw(ctx, "expected string");
- if (buf->len / 2)
- {
- int len = fz_mini(buf->len / 2, nelem(dst));
- for (i = 0; i < len; i++)
- dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
- pdf_map_one_to_many(ctx, cmap, src, dst, i);
- }
- }
- }
- pdf_cmap *
- pdf_load_cmap(fz_context *ctx, fz_stream *file)
- {
- pdf_cmap *cmap;
- char key[64];
- pdf_lexbuf buf;
- pdf_token tok;
- const char *where;
- pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
- cmap = pdf_new_cmap(ctx);
- strcpy(key, ".notdef");
- fz_var(where);
- fz_try(ctx)
- {
- while (1)
- {
- where = "";
- tok = pdf_lex_cmap(file, &buf);
- if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP)
- break;
- else if (tok == PDF_TOK_NAME)
- {
- if (!strcmp(buf.scratch, "CMapName"))
- {
- where = " after CMapName";
- pdf_parse_cmap_name(ctx, cmap, file, &buf);
- }
- else if (!strcmp(buf.scratch, "WMode"))
- {
- where = " after WMode";
- pdf_parse_wmode(ctx, cmap, file, &buf);
- }
- else
- fz_strlcpy(key, buf.scratch, sizeof key);
- }
- else if (tok == TOK_USECMAP)
- {
- fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name));
- }
- else if (tok == TOK_BEGIN_CODESPACE_RANGE)
- {
- where = " codespacerange";
- pdf_parse_codespace_range(ctx, cmap, file, &buf);
- }
- else if (tok == TOK_BEGIN_BF_CHAR)
- {
- where = " bfchar";
- pdf_parse_bf_char(ctx, cmap, file, &buf);
- }
- else if (tok == TOK_BEGIN_CID_CHAR)
- {
- where = " cidchar";
- pdf_parse_cid_char(ctx, cmap, file, &buf);
- }
- else if (tok == TOK_BEGIN_BF_RANGE)
- {
- where = " bfrange";
- pdf_parse_bf_range(ctx, cmap, file, &buf);
- }
- else if (tok == TOK_BEGIN_CID_RANGE)
- {
- where = "cidrange";
- pdf_parse_cid_range(ctx, cmap, file, &buf);
- }
- /* ignore everything else */
- }
- pdf_sort_cmap(ctx, cmap);
- }
- fz_always(ctx)
- {
- pdf_lexbuf_fin(&buf);
- }
- fz_catch(ctx)
- {
- pdf_drop_cmap(ctx, cmap);
- fz_throw(ctx, "syntaxerror in cmap%s", where);
- }
- return cmap;
- }
|