| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334 |
- #include "fitz-internal.h"
- #include "mupdf-internal.h"
- /* #define DEBUG_LINEARIZATION */
- /* #define DEBUG_HEAP_SORT */
- /* #define DEBUG_WRITING */
- typedef struct pdf_write_options_s pdf_write_options;
- /*
- As part of linearization, we need to keep a list of what objects are used
- by what page. We do this by recording the objects used in a given page
- in a page_objects structure. We have a list of these structures (one per
- page) in the page_objects_list structure.
- The page_objects structure maintains a heap in the object array, so
- insertion takes log n time, and we can heapsort and dedupe at the end for
- a total worse case n log n time.
- The magic heap invariant is that:
- entry[n] >= entry[(n+1)*2-1] & entry[n] >= entry[(n+1)*2]
- or equivalently:
- entry[(n-1)>>1] >= entry[n]
- For a discussion of the heap data structure (and heapsort) see Kingston,
- "Algorithms and Data Structures".
- */
- typedef struct {
- int num_shared;
- int page_object_number;
- int num_objects;
- int min_ofs;
- int max_ofs;
- /* Extensible list of objects used on this page */
- int cap;
- int len;
- int object[1];
- } page_objects;
- typedef struct {
- int cap;
- int len;
- page_objects *page[1];
- } page_objects_list;
- struct pdf_write_options_s
- {
- FILE *out;
- int do_ascii;
- int do_expand;
- int do_garbage;
- int do_linear;
- int *use_list;
- int *ofs_list;
- int *gen_list;
- int *renumber_map;
- int continue_on_error;
- int *errors;
- /* The following extras are required for linearization */
- int *rev_renumber_map;
- int *rev_gen_list;
- int start;
- int first_xref_offset;
- int main_xref_offset;
- int first_xref_entry_offset;
- int file_len;
- int hints_shared_offset;
- int hintstream_len;
- pdf_obj *linear_l;
- pdf_obj *linear_h0;
- pdf_obj *linear_h1;
- pdf_obj *linear_o;
- pdf_obj *linear_e;
- pdf_obj *linear_n;
- pdf_obj *linear_t;
- pdf_obj *hints_s;
- pdf_obj *hints_length;
- int page_count;
- page_objects_list *page_object_lists;
- };
- /*
- * Constants for use with use_list.
- *
- * If use_list[num] = 0, then object num is unused.
- * If use_list[num] & PARAMS, then object num is the linearisation params obj.
- * If use_list[num] & CATALOGUE, then object num is used by the catalogue.
- * If use_list[num] & PAGE1, then object num is used by page 1.
- * If use_list[num] & SHARED, then object num is shared between pages.
- * If use_list[num] & PAGE_OBJECT then this must be the first object in a page.
- * Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT).
- */
- enum
- {
- USE_CATALOGUE = 2,
- USE_PAGE1 = 4,
- USE_SHARED = 8,
- USE_PARAMS = 16,
- USE_HINTS = 32,
- USE_PAGE_OBJECT = 64,
- USE_PAGE_MASK = ~127,
- USE_PAGE_SHIFT = 7
- };
- /*
- * page_objects and page_object_list handling functions
- */
- static page_objects_list *
- page_objects_list_create(fz_context *ctx)
- {
- page_objects_list *pol = fz_calloc(ctx, 1, sizeof(*pol));
- pol->cap = 1;
- pol->len = 0;
- return pol;
- }
- static void
- page_objects_list_destroy(fz_context *ctx, page_objects_list *pol)
- {
- int i;
- if (!pol)
- return;
- for (i = 0; i < pol->len; i++)
- {
- fz_free(ctx, pol->page[i]);
- }
- fz_free(ctx, pol);
- }
- static void
- page_objects_list_ensure(fz_context *ctx, page_objects_list **pol, int newcap)
- {
- int oldcap = (*pol)->cap;
- if (newcap <= oldcap)
- return;
- *pol = fz_resize_array(ctx, *pol, 1, sizeof(page_objects_list) + (newcap-1)*sizeof(page_objects *));
- memset(&(*pol)->page[oldcap], 0, (newcap-oldcap)*sizeof(page_objects *));
- (*pol)->cap = newcap;
- }
- static page_objects *
- page_objects_create(fz_context *ctx)
- {
- int initial_cap = 8;
- page_objects *po = fz_calloc(ctx, 1, sizeof(*po) + (initial_cap-1) * sizeof(int));
- po->cap = initial_cap;
- po->len = 0;
- return po;
- }
- static void
- page_objects_insert(fz_context *ctx, page_objects **ppo, int i)
- {
- page_objects *po;
- /* Make a page_objects if we don't have one */
- if (*ppo == NULL)
- *ppo = page_objects_create(ctx);
- po = *ppo;
- /* page_objects insertion: extend the page_objects by 1, and put us on the end */
- if (po->len == po->cap)
- {
- po = fz_resize_array(ctx, po, 1, sizeof(page_objects) + (po->cap*2 - 1)*sizeof(int));
- po->cap *= 2;
- *ppo = po;
- }
- po->object[po->len++] = i;
- }
- static void
- page_objects_list_insert(fz_context *ctx, pdf_write_options *opts, int page, int object)
- {
- page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
- if (opts->page_object_lists->len < page+1)
- opts->page_object_lists->len = page+1;
- page_objects_insert(ctx, &opts->page_object_lists->page[page], object);
- }
- static void
- page_objects_list_set_page_object(fz_context *ctx, pdf_write_options *opts, int page, int object)
- {
- page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
- opts->page_object_lists->page[page]->page_object_number = object;
- }
- static void
- page_objects_sort(fz_context *ctx, page_objects *po)
- {
- int i, j;
- int n = po->len;
- /* Step 1: Make a heap */
- /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
- for (i = 1; i < n; i++)
- {
- /* Now bubble backwards to maintain heap invariant */
- j = i;
- while (j != 0)
- {
- int tmp;
- int k = (j-1)>>1;
- if (po->object[k] >= po->object[j])
- break;
- tmp = po->object[k];
- po->object[k] = po->object[j];
- po->object[j] = tmp;
- j = k;
- }
- }
- /* Step 2: Heap sort */
- /* Invariant: valid heap in [0..i), sorted list in [i..n) */
- /* Initially: i = n */
- for (i = n-1; i > 0; i--)
- {
- /* Swap the maximum (0th) element from the page_objects into its place
- * in the sorted list (position i). */
- int tmp = po->object[0];
- po->object[0] = po->object[i];
- po->object[i] = tmp;
- /* Now, the page_objects is invalid because the 0th element is out
- * of place. Bubble it until the page_objects is valid. */
- j = 0;
- while (1)
- {
- /* Children are k and k+1 */
- int k = (j+1)*2-1;
- /* If both children out of the page_objects, we're done */
- if (k > i-1)
- break;
- /* If both are in the page_objects, pick the larger one */
- if (k < i-1 && po->object[k] < po->object[k+1])
- k++;
- /* If j is bigger than k (i.e. both of it's children),
- * we're done */
- if (po->object[j] > po->object[k])
- break;
- tmp = po->object[k];
- po->object[k] = po->object[j];
- po->object[j] = tmp;
- j = k;
- }
- }
- }
- static int
- order_ge(int ui, int uj)
- {
- /*
- For linearization, we need to order the sections as follows:
- Remaining pages
- Shared objects
- Objects not associated with any page
- (Linearization params)
- Catalogue (and other document level objects)
- First page
- (Primary Hint stream) (*)
- Any free objects
- Note, this is NOT the same order they appear in
- the final file!
- The PDF reference gives us the option of putting the hint stream
- after the first page, and we take it, for simplicity.
- */
- /* If the 2 objects are in the same section, then page object comes first. */
- if (((ui ^ uj) & ~USE_PAGE_OBJECT) == 0)
- return ((ui & USE_PAGE_OBJECT) == 0);
- /* Put unused objects last */
- else if (ui == 0)
- return 1;
- else if (uj == 0)
- return 0;
- /* Put the hint stream before that... */
- else if (ui & USE_HINTS)
- return 1;
- else if (uj & USE_HINTS)
- return 0;
- /* Put page 1 before that... */
- else if (ui & USE_PAGE1)
- return 1;
- else if (uj & USE_PAGE1)
- return 0;
- /* Put the calagoue before that... */
- else if (ui & USE_CATALOGUE)
- return 1;
- else if (uj & USE_CATALOGUE)
- return 0;
- /* Put the linearization params before that... */
- else if (ui & USE_PARAMS)
- return 1;
- else if (uj & USE_PARAMS)
- return 0;
- /* Put objects not associated with any page (anything
- * not touched by the catalogue) before that... */
- else if (ui == 0)
- return 1;
- else if (uj == 0)
- return 0;
- /* Put shared objects before that... */
- else if (ui & USE_SHARED)
- return 1;
- else if (uj & USE_SHARED)
- return 0;
- /* And otherwise, order by the page number on which
- * they are used. */
- return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT);
- }
- static void
- heap_sort(int *list, int n, const int *val, int (*ge)(int, int))
- {
- int i, j;
- #ifdef DEBUG_HEAP_SORT
- fprintf(stderr, "Initially:\n");
- for (i=0; i < n; i++)
- {
- fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]);
- }
- #endif
- /* Step 1: Make a heap */
- /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
- for (i = 1; i < n; i++)
- {
- /* Now bubble backwards to maintain heap invariant */
- j = i;
- while (j != 0)
- {
- int tmp;
- int k = (j-1)>>1;
- if (ge(val[list[k]], val[list[j]]))
- break;
- tmp = list[k];
- list[k] = list[j];
- list[j] = tmp;
- j = k;
- }
- }
- #ifdef DEBUG_HEAP_SORT
- fprintf(stderr, "Valid heap:\n");
- for (i=0; i < n; i++)
- {
- int k;
- fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
- k = (i+1)*2-1;
- if (k < n)
- {
- if (ge(val[list[i]], val[list[k]]))
- fprintf(stderr, "OK ");
- else
- fprintf(stderr, "BAD ");
- }
- if (k+1 < n)
- {
- if (ge(val[list[i]], val[list[k+1]]))
- fprintf(stderr, "OK\n");
- else
- fprintf(stderr, "BAD\n");
- }
- else
- fprintf(stderr, "\n");
- }
- #endif
- /* Step 2: Heap sort */
- /* Invariant: valid heap in [0..i), sorted list in [i..n) */
- /* Initially: i = n */
- for (i = n-1; i > 0; i--)
- {
- /* Swap the maximum (0th) element from the page_objects into its place
- * in the sorted list (position i). */
- int tmp = list[0];
- list[0] = list[i];
- list[i] = tmp;
- /* Now, the page_objects is invalid because the 0th element is out
- * of place. Bubble it until the page_objects is valid. */
- j = 0;
- while (1)
- {
- /* Children are k and k+1 */
- int k = (j+1)*2-1;
- /* If both children out of the page_objects, we're done */
- if (k > i-1)
- break;
- /* If both are in the page_objects, pick the larger one */
- if (k < i-1 && ge(val[list[k+1]], val[list[k]]))
- k++;
- /* If j is bigger than k (i.e. both of it's children),
- * we're done */
- if (ge(val[list[j]], val[list[k]]))
- break;
- tmp = list[k];
- list[k] = list[j];
- list[j] = tmp;
- j = k;
- }
- }
- #ifdef DEBUG_HEAP_SORT
- fprintf(stderr, "Sorted:\n");
- for (i=0; i < n; i++)
- {
- fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
- if (i+1 < n)
- {
- if (ge(val[list[i+1]], val[list[i]]))
- fprintf(stderr, "OK");
- else
- fprintf(stderr, "BAD");
- }
- fprintf(stderr, "\n");
- }
- #endif
- }
- static void
- page_objects_dedupe(fz_context *ctx, page_objects *po)
- {
- int i, j;
- int n = po->len-1;
- for (i = 0; i < n; i++)
- {
- if (po->object[i] == po->object[i+1])
- break;
- }
- j = i; /* j points to the last valid one */
- i++; /* i points to the first one we haven't looked at */
- for (; i < n; i++)
- {
- if (po->object[j] != po->object[i])
- po->object[++j] = po->object[i];
- }
- po->len = j+1;
- }
- static void
- page_objects_list_sort_and_dedupe(fz_context *ctx, page_objects_list *pol)
- {
- int i;
- int n = pol->len;
- for (i = 0; i < n; i++)
- {
- page_objects_sort(ctx, pol->page[i]);
- page_objects_dedupe(ctx, pol->page[i]);
- }
- }
- #ifdef DEBUG_LINEARIZATION
- static void
- page_objects_dump(pdf_write_options *opts)
- {
- page_objects_list *pol = opts->page_object_lists;
- int i, j;
- for (i = 0; i < pol->len; i++)
- {
- page_objects *p = pol->page[i];
- fprintf(stderr, "Page %d\n", i+1);
- for (j = 0; j < p->len; j++)
- {
- int o = p->object[j];
- fprintf(stderr, "\tObject %d: use=%x\n", o, opts->use_list[o]);
- }
- fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs);
- fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared);
- fprintf(stderr, "Page object number=%d\n", p->page_object_number);
- }
- }
- static void
- objects_dump(pdf_document *xref, pdf_write_options *opts)
- {
- int i;
- for (i=0; i < xref->len; i++)
- {
- fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], opts->ofs_list[i]);
- }
- }
- #endif
- /*
- * Garbage collect objects not reachable from the trailer.
- */
- static pdf_obj *sweepref(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
- {
- int num = pdf_to_num(obj);
- int gen = pdf_to_gen(obj);
- fz_context *ctx = xref->ctx;
- if (num < 0 || num >= xref->len)
- return NULL;
- if (opts->use_list[num])
- return NULL;
- opts->use_list[num] = 1;
- /* Bake in /Length in stream objects */
- fz_try(ctx)
- {
- if (pdf_is_stream(xref, num, gen))
- {
- pdf_obj *len = pdf_dict_gets(obj, "Length");
- if (pdf_is_indirect(len))
- {
- opts->use_list[pdf_to_num(len)] = 0;
- len = pdf_resolve_indirect(len);
- pdf_dict_puts(obj, "Length", len);
- }
- }
- }
- fz_catch(ctx)
- {
- /* Leave broken */
- }
- return pdf_resolve_indirect(obj);
- }
- static void sweepobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
- {
- int i;
- if (pdf_is_indirect(obj))
- obj = sweepref(xref, opts, obj);
- if (pdf_is_dict(obj))
- {
- int n = pdf_dict_len(obj);
- for (i = 0; i < n; i++)
- sweepobj(xref, opts, pdf_dict_get_val(obj, i));
- }
- else if (pdf_is_array(obj))
- {
- int n = pdf_array_len(obj);
- for (i = 0; i < n; i++)
- sweepobj(xref, opts, pdf_array_get(obj, i));
- }
- }
- /*
- * Scan for and remove duplicate objects (slow)
- */
- static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
- {
- int num, other;
- fz_context *ctx = xref->ctx;
- for (num = 1; num < xref->len; num++)
- {
- /* Only compare an object to objects preceding it */
- for (other = 1; other < num; other++)
- {
- pdf_obj *a, *b;
- int differ, newnum, streama, streamb;
- if (num == other || !opts->use_list[num] || !opts->use_list[other])
- continue;
- /*
- * Comparing stream objects data contents would take too long.
- *
- * pdf_is_stream calls pdf_cache_object and ensures
- * that the xref table has the objects loaded.
- */
- fz_try(ctx)
- {
- streama = pdf_is_stream(xref, num, 0);
- streamb = pdf_is_stream(xref, other, 0);
- differ = streama || streamb;
- if (streama && streamb && opts->do_garbage >= 4)
- differ = 0;
- }
- fz_catch(ctx)
- {
- /* Assume different */
- differ = 1;
- }
- if (differ)
- continue;
- a = xref->table[num].obj;
- b = xref->table[other].obj;
- a = pdf_resolve_indirect(a);
- b = pdf_resolve_indirect(b);
- if (pdf_objcmp(a, b))
- continue;
- if (streama && streamb)
- {
- /* Check to see if streams match too. */
- fz_buffer *sa = NULL;
- fz_buffer *sb = NULL;
- fz_var(sa);
- fz_var(sb);
- differ = 1;
- fz_try(ctx)
- {
- unsigned char *dataa, *datab;
- int lena, lenb;
- sa = pdf_load_raw_renumbered_stream(xref, num, 0, num, 0);
- sb = pdf_load_raw_renumbered_stream(xref, other, 0, other, 0);
- lena = fz_buffer_storage(ctx, sa, &dataa);
- lenb = fz_buffer_storage(ctx, sb, &datab);
- if (lena == lenb && memcmp(dataa, datab, lena) == 0)
- differ = 0;
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, sa);
- fz_drop_buffer(ctx, sb);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- if (differ)
- continue;
- }
- /* Keep the lowest numbered object */
- newnum = fz_mini(num, other);
- opts->renumber_map[num] = newnum;
- opts->renumber_map[other] = newnum;
- opts->rev_renumber_map[newnum] = num; /* Either will do */
- opts->use_list[fz_maxi(num, other)] = 0;
- /* One duplicate was found, do not look for another */
- break;
- }
- }
- }
- /*
- * Renumber objects sequentially so the xref is more compact
- *
- * This code assumes that any opts->renumber_map[n] <= n for all n.
- */
- static void compactxref(pdf_document *xref, pdf_write_options *opts)
- {
- int num, newnum;
- /*
- * Update renumber_map in-place, clustering all used
- * objects together at low object ids. Objects that
- * already should be renumbered will have their new
- * object ids be updated to reflect the compaction.
- */
- newnum = 1;
- for (num = 1; num < xref->len; num++)
- {
- /* If it's not used, map it to zero */
- if (!opts->use_list[opts->renumber_map[num]])
- {
- opts->renumber_map[num] = 0;
- }
- /* If it's not moved, compact it. */
- else if (opts->renumber_map[num] == num)
- {
- opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num];
- opts->rev_gen_list[newnum] = opts->rev_gen_list[num];
- opts->renumber_map[num] = newnum++;
- }
- /* Otherwise it's used, and moved. We know that it must have
- * moved down, so the place it's moved to will be in the right
- * place already. */
- else
- {
- opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
- }
- }
- }
- /*
- * Update indirect objects according to renumbering established when
- * removing duplicate objects and compacting the xref.
- */
- static void renumberobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
- {
- int i;
- fz_context *ctx = xref->ctx;
- if (pdf_is_dict(obj))
- {
- int n = pdf_dict_len(obj);
- for (i = 0; i < n; i++)
- {
- pdf_obj *key = pdf_dict_get_key(obj, i);
- pdf_obj *val = pdf_dict_get_val(obj, i);
- if (pdf_is_indirect(val))
- {
- val = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(val)], 0, xref);
- pdf_dict_put(obj, key, val);
- pdf_drop_obj(val);
- }
- else
- {
- renumberobj(xref, opts, val);
- }
- }
- }
- else if (pdf_is_array(obj))
- {
- int n = pdf_array_len(obj);
- for (i = 0; i < n; i++)
- {
- pdf_obj *val = pdf_array_get(obj, i);
- if (pdf_is_indirect(val))
- {
- val = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(val)], 0, xref);
- pdf_array_put(obj, i, val);
- pdf_drop_obj(val);
- }
- else
- {
- renumberobj(xref, opts, val);
- }
- }
- }
- }
- static void renumberobjs(pdf_document *xref, pdf_write_options *opts)
- {
- pdf_xref_entry *oldxref;
- int newlen;
- int num;
- fz_context *ctx = xref->ctx;
- int *new_use_list;
- new_use_list = fz_calloc(ctx, xref->len+3, sizeof(int));
- fz_try(ctx)
- {
- /* Apply renumber map to indirect references in all objects in xref */
- renumberobj(xref, opts, xref->trailer);
- for (num = 0; num < xref->len; num++)
- {
- pdf_obj *obj = xref->table[num].obj;
- if (pdf_is_indirect(obj))
- {
- obj = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(obj)], 0, xref);
- pdf_update_object(xref, num, obj);
- pdf_drop_obj(obj);
- }
- else
- {
- renumberobj(xref, opts, obj);
- }
- }
- /* Create new table for the reordered, compacted xref */
- oldxref = xref->table;
- xref->table = fz_malloc_array(ctx, xref->len + 3, sizeof(pdf_xref_entry));
- xref->table[0] = oldxref[0];
- /* Move used objects into the new compacted xref */
- newlen = 0;
- for (num = 1; num < xref->len; num++)
- {
- if (opts->use_list[num])
- {
- if (newlen < opts->renumber_map[num])
- newlen = opts->renumber_map[num];
- xref->table[opts->renumber_map[num]] = oldxref[num];
- new_use_list[opts->renumber_map[num]] = opts->use_list[num];
- }
- else
- {
- pdf_drop_obj(oldxref[num].obj);
- }
- }
- }
- fz_catch(ctx)
- {
- fz_free(ctx, new_use_list);
- fz_rethrow(ctx);
- }
- fz_free(ctx, oldxref);
- fz_free(ctx, opts->use_list);
- opts->use_list = new_use_list;
- /* Update the used objects count in compacted xref */
- xref->len = newlen + 1;
- for (num = 1; num < xref->len; num++)
- {
- opts->renumber_map[num] = num;
- }
- }
- static void page_objects_list_renumber(pdf_write_options *opts)
- {
- int i, j;
- for (i = 0; i < opts->page_object_lists->len; i++)
- {
- page_objects *po = opts->page_object_lists->page[i];
- for (j = 0; j < po->len; j++)
- {
- po->object[j] = opts->renumber_map[po->object[j]];
- }
- po->page_object_number = opts->renumber_map[po->page_object_number];
- }
- }
- static void
- mark_all(pdf_document *xref, pdf_write_options *opts, pdf_obj *val, int flag, int page)
- {
- fz_context *ctx = xref->ctx;
- if (pdf_obj_mark(val))
- return;
- fz_try(ctx)
- {
- if (pdf_is_indirect(val))
- {
- int num = pdf_to_num(val);
- if (flag >= 16 && (opts->use_list[num] & USE_PAGE_MASK))
- /* Already used */
- opts->use_list[num] |= USE_SHARED;
- else
- opts->use_list[num] |= flag;
- if (page >= 0)
- page_objects_list_insert(ctx, opts, page, num);
- }
- if (pdf_is_dict(val))
- {
- int i, n = pdf_dict_len(val);
- for (i = 0; i < n; i++)
- {
- mark_all(xref, opts, pdf_dict_get_val(val, i), flag, page);
- }
- }
- else if (pdf_is_array(val))
- {
- int i, n = pdf_array_len(val);
- for (i = 0; i < n; i++)
- {
- mark_all(xref, opts, pdf_array_get(val, i), flag, page);
- }
- }
- }
- fz_always(ctx)
- {
- pdf_obj_unmark(val);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static int
- mark_pages(pdf_document *xref, pdf_write_options *opts, pdf_obj *val, int pagenum)
- {
- fz_context *ctx = xref->ctx;
- if (pdf_obj_mark(val))
- return pagenum;
- fz_try(ctx)
- {
- if (pdf_is_dict(val))
- {
- if (!strcmp("Page", pdf_to_name(pdf_dict_gets(val, "Type"))))
- {
- int num = pdf_to_num(val);
- pdf_obj_unmark(val);
- mark_all(xref, opts, val, pagenum == 0 ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum);
- page_objects_list_set_page_object(ctx, opts, pagenum, num);
- pagenum++;
- opts->use_list[num] |= USE_PAGE_OBJECT;
- }
- else
- {
- int i, n = pdf_dict_len(val);
- for (i = 0; i < n; i++)
- {
- pdf_obj *key = pdf_dict_get_key(val, i);
- pdf_obj *obj = pdf_dict_get_val(val, i);
- if (!strcmp("Kids", pdf_to_name(key)))
- pagenum = mark_pages(xref, opts, obj, pagenum);
- else
- mark_all(xref, opts, obj, USE_CATALOGUE, -1);
- }
- if (pdf_is_indirect(val))
- {
- int num = pdf_to_num(val);
- opts->use_list[num] |= USE_CATALOGUE;
- }
- }
- }
- else if (pdf_is_array(val))
- {
- int i, n = pdf_array_len(val);
- for (i = 0; i < n; i++)
- {
- pagenum = mark_pages(xref, opts, pdf_array_get(val, i), pagenum);
- }
- if (pdf_is_indirect(val))
- {
- int num = pdf_to_num(val);
- opts->use_list[num] |= USE_CATALOGUE;
- }
- }
- }
- fz_always(ctx)
- {
- pdf_obj_unmark(val);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- return pagenum;
- }
- static void
- mark_root(pdf_document *xref, pdf_write_options *opts, pdf_obj *dict)
- {
- fz_context *ctx = xref->ctx;
- int i, n = pdf_dict_len(dict);
- if (pdf_obj_mark(dict))
- return;
- fz_try(ctx)
- {
- if (pdf_is_indirect(dict))
- {
- int num = pdf_to_num(dict);
- opts->use_list[num] |= USE_CATALOGUE;
- }
- for (i = 0; i < n; i++)
- {
- char *key = pdf_to_name(pdf_dict_get_key(dict, i));
- pdf_obj *val = pdf_dict_get_val(dict, i);
- if (!strcmp("Pages", key))
- opts->page_count = mark_pages(xref, opts, val, 0);
- else if (!strcmp("Outlines", key))
- {
- /* FIXME: Look at PageMode to decide whether to
- * USE_OTHERPAGES or USE_PAGE1 here. */
- if (0 /* PageMode == "Outlines" */)
- mark_all(xref, opts, val, USE_PAGE1, -1);
- }
- else
- mark_all(xref, opts, val, USE_CATALOGUE, -1);
- }
- }
- fz_always(ctx)
- {
- pdf_obj_unmark(dict);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static void
- mark_trailer(pdf_document *xref, pdf_write_options *opts, pdf_obj *dict)
- {
- fz_context *ctx = xref->ctx;
- int i, n = pdf_dict_len(dict);
- if (pdf_obj_mark(dict))
- return;
- fz_try(ctx)
- {
- for (i = 0; i < n; i++)
- {
- char *key = pdf_to_name(pdf_dict_get_key(dict, i));
- pdf_obj *val = pdf_dict_get_val(dict, i);
- if (!strcmp("Root", key))
- mark_root(xref, opts, val);
- else
- mark_all(xref, opts, val, USE_CATALOGUE, -1);
- }
- }
- fz_always(ctx)
- {
- pdf_obj_unmark(dict);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static void
- add_linearization_objs(pdf_document *xref, pdf_write_options *opts)
- {
- pdf_obj *params_obj = NULL;
- pdf_obj *params_ref = NULL;
- pdf_obj *hint_obj = NULL;
- pdf_obj *hint_ref = NULL;
- pdf_obj *o = NULL;
- int params_num, hint_num;
- fz_context *ctx = xref->ctx;
- fz_var(params_obj);
- fz_var(params_ref);
- fz_var(hint_obj);
- fz_var(hint_ref);
- fz_var(o);
- fz_try(ctx)
- {
- /* Linearization params */
- params_obj = pdf_new_dict(ctx, 10);
- params_ref = pdf_new_ref(xref, params_obj);
- params_num = pdf_to_num(params_ref);
- opts->use_list[params_num] = USE_PARAMS;
- opts->renumber_map[params_num] = params_num;
- opts->rev_renumber_map[params_num] = params_num;
- opts->gen_list[params_num] = 0;
- opts->rev_gen_list[params_num] = 0;
- pdf_dict_puts_drop(params_obj, "Linearized", pdf_new_real(ctx, 1.0));
- opts->linear_l = pdf_new_int(ctx, INT_MIN);
- pdf_dict_puts(params_obj, "L", opts->linear_l);
- opts->linear_h0 = pdf_new_int(ctx, INT_MIN);
- o = pdf_new_array(ctx, 2);
- pdf_array_push(o, opts->linear_h0);
- opts->linear_h1 = pdf_new_int(ctx, INT_MIN);
- pdf_array_push(o, opts->linear_h1);
- pdf_dict_puts_drop(params_obj, "H", o);
- o = NULL;
- opts->linear_o = pdf_new_int(ctx, INT_MIN);
- pdf_dict_puts(params_obj, "O", opts->linear_o);
- opts->linear_e = pdf_new_int(ctx, INT_MIN);
- pdf_dict_puts(params_obj, "E", opts->linear_e);
- opts->linear_n = pdf_new_int(ctx, INT_MIN);
- pdf_dict_puts(params_obj, "N", opts->linear_n);
- opts->linear_t = pdf_new_int(ctx, INT_MIN);
- pdf_dict_puts(params_obj, "T", opts->linear_t);
- /* Primary hint stream */
- hint_obj = pdf_new_dict(ctx, 10);
- hint_ref = pdf_new_ref(xref, hint_obj);
- hint_num = pdf_to_num(hint_ref);
- opts->use_list[hint_num] = USE_HINTS;
- opts->renumber_map[hint_num] = hint_num;
- opts->rev_renumber_map[hint_num] = hint_num;
- opts->gen_list[hint_num] = 0;
- opts->rev_gen_list[hint_num] = 0;
- pdf_dict_puts_drop(hint_obj, "P", pdf_new_int(ctx, 0));
- opts->hints_s = pdf_new_int(ctx, INT_MIN);
- pdf_dict_puts(hint_obj, "S", opts->hints_s);
- /* FIXME: Do we have thumbnails? Do a T entry */
- /* FIXME: Do we have outlines? Do an O entry */
- /* FIXME: Do we have article threads? Do an A entry */
- /* FIXME: Do we have named destinations? Do a E entry */
- /* FIXME: Do we have interactive forms? Do a V entry */
- /* FIXME: Do we have document information? Do an I entry */
- /* FIXME: Do we have logical structure heirarchy? Do a C entry */
- /* FIXME: Do L, Page Label hint table */
- pdf_dict_puts_drop(hint_obj, "Filter", pdf_new_name(ctx, "FlateDecode"));
- opts->hints_length = pdf_new_int(ctx, INT_MIN);
- pdf_dict_puts(hint_obj, "Length", opts->hints_length);
- xref->table[hint_num].stm_ofs = -1;
- }
- fz_always(ctx)
- {
- pdf_drop_obj(params_obj);
- pdf_drop_obj(params_ref);
- pdf_drop_obj(hint_ref);
- pdf_drop_obj(hint_obj);
- pdf_drop_obj(o);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static void
- lpr_inherit_res_contents(fz_context *ctx, pdf_obj *res, pdf_obj *dict, char *text)
- {
- pdf_obj *o, *r;
- int i, n;
- /* If the parent node doesn't have an entry of this type, give up. */
- o = pdf_dict_gets(dict, text);
- if (!o)
- return;
- /* If the resources dict we are building doesn't have an entry of this
- * type yet, then just copy it (ensuring it's not a reference) */
- r = pdf_dict_gets(res, text);
- if (r == NULL)
- {
- o = pdf_resolve_indirect(o);
- if (pdf_is_dict(o))
- o = pdf_copy_dict(ctx, o);
- else if (pdf_is_array(o))
- o = pdf_copy_array(ctx, o);
- else
- o = NULL;
- if (o)
- pdf_dict_puts(res, text, o);
- return;
- }
- /* Otherwise we need to merge o into r */
- if (pdf_is_dict(o))
- {
- n = pdf_dict_len(o);
- for (i = 0; i < n; i++)
- {
- pdf_obj *key = pdf_dict_get_key(o, i);
- pdf_obj *val = pdf_dict_get_val(o, i);
- if (pdf_dict_gets(res, pdf_to_name(key)))
- continue;
- pdf_dict_puts(res, pdf_to_name(key), val);
- }
- }
- }
- static void
- lpr_inherit_res(fz_context *ctx, pdf_obj *node, int depth, pdf_obj *dict)
- {
- while (1)
- {
- pdf_obj *o;
- node = pdf_dict_gets(node, "Parent");
- depth--;
- if (!node || depth < 0)
- break;
- o = pdf_dict_gets(node, "Resources");
- if (o)
- {
- lpr_inherit_res_contents(ctx, dict, o, "ExtGState");
- lpr_inherit_res_contents(ctx, dict, o, "ColorSpace");
- lpr_inherit_res_contents(ctx, dict, o, "Pattern");
- lpr_inherit_res_contents(ctx, dict, o, "Shading");
- lpr_inherit_res_contents(ctx, dict, o, "XObject");
- lpr_inherit_res_contents(ctx, dict, o, "Font");
- lpr_inherit_res_contents(ctx, dict, o, "ProcSet");
- lpr_inherit_res_contents(ctx, dict, o, "Properties");
- }
- }
- }
- static pdf_obj *
- lpr_inherit(fz_context *ctx, pdf_obj *node, char *text, int depth)
- {
- do
- {
- pdf_obj *o = pdf_dict_gets(node, text);
- if (o)
- return pdf_resolve_indirect(o);
- node = pdf_dict_gets(node, "Parent");
- depth--;
- }
- while (depth >= 0 && node);
- return NULL;
- }
- static int
- lpr(fz_context *ctx, pdf_obj *node, int depth, int page)
- {
- pdf_obj *kids;
- pdf_obj *o = NULL;
- int i, n;
- if (pdf_obj_mark(node))
- return page;
- fz_var(o);
- fz_try(ctx)
- {
- if (!strcmp("Page", pdf_to_name(pdf_dict_gets(node, "Type"))))
- {
- pdf_obj *r; /* r is deliberately not cleaned up */
- /* Copy resources down to the child */
- o = pdf_keep_obj(pdf_dict_gets(node, "Resources"));
- if (!o)
- {
- o = pdf_keep_obj(pdf_new_dict(ctx, 2));
- pdf_dict_puts(node, "Resources", o);
- }
- lpr_inherit_res(ctx, node, depth, o);
- r = lpr_inherit(ctx, node, "MediaBox", depth);
- if (r)
- pdf_dict_puts(node, "MediaBox", r);
- r = lpr_inherit(ctx, node, "CropBox", depth);
- if (r)
- pdf_dict_puts(node, "CropBox", r);
- r = lpr_inherit(ctx, node, "BleedBox", depth);
- if (r)
- pdf_dict_puts(node, "BleedBox", r);
- r = lpr_inherit(ctx, node, "TrimBox", depth);
- if (r)
- pdf_dict_puts(node, "TrimBox", r);
- r = lpr_inherit(ctx, node, "ArtBox", depth);
- if (r)
- pdf_dict_puts(node, "ArtBox", r);
- r = lpr_inherit(ctx, node, "Rotate", depth);
- if (r)
- pdf_dict_puts(node, "Rotate", r);
- page++;
- }
- else
- {
- kids = pdf_dict_gets(node, "Kids");
- n = pdf_array_len(kids);
- for(i = 0; i < n; i++)
- {
- page = lpr(ctx, pdf_array_get(kids, i), depth+1, page);
- }
- pdf_dict_dels(node, "Resources");
- pdf_dict_dels(node, "MediaBox");
- pdf_dict_dels(node, "CropBox");
- pdf_dict_dels(node, "BleedBox");
- pdf_dict_dels(node, "TrimBox");
- pdf_dict_dels(node, "ArtBox");
- pdf_dict_dels(node, "Rotate");
- }
- }
- fz_always(ctx)
- {
- pdf_drop_obj(o);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- pdf_obj_unmark(node);
- return page;
- }
- void
- pdf_localise_page_resources(pdf_document *xref)
- {
- fz_context *ctx = xref->ctx;
- if (xref->resources_localised)
- return;
- lpr(ctx, pdf_dict_getp(xref->trailer, "Root/Pages"), 0, 0);
- xref->resources_localised = 1;
- }
- static void
- linearize(pdf_document *xref, pdf_write_options *opts)
- {
- int i;
- int n = xref->len + 2;
- int *reorder;
- int *rev_renumber_map;
- int *rev_gen_list;
- fz_context *ctx = xref->ctx;
- opts->page_object_lists = page_objects_list_create(ctx);
- /* Ensure that every page has local references of its resources */
- /* FIXME: We could 'thin' the resources according to what is actually
- * required for each page, but this would require us to run the page
- * content streams. */
- pdf_localise_page_resources(xref);
- /* Walk the objects for each page, marking which ones are used, where */
- memset(opts->use_list, 0, n * sizeof(int));
- mark_trailer(xref, opts, xref->trailer);
- /* Add new objects required for linearization */
- add_linearization_objs(xref, opts);
- #ifdef DEBUG_WRITING
- fprintf(stderr, "Usage calculated:\n");
- for (i=0; i < xref->len; i++)
- {
- fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]);
- }
- #endif
- /* Allocate/init the structures used for renumbering the objects */
- reorder = fz_calloc(ctx, n, sizeof(int));
- rev_renumber_map = fz_calloc(ctx, n, sizeof(int));
- rev_gen_list = fz_calloc(ctx, n, sizeof(int));
- for (i = 0; i < n; i++)
- {
- reorder[i] = i;
- }
- /* Heap sort the reordering */
- heap_sort(reorder+1, n-1, opts->use_list, &order_ge);
- #ifdef DEBUG_WRITING
- fprintf(stderr, "Reordered:\n");
- for (i=1; i < xref->len; i++)
- {
- fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]);
- }
- #endif
- /* Find the split point */
- for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++);
- opts->start = i;
- /* Roll the reordering into the renumber_map */
- for (i = 0; i < n; i++)
- {
- opts->renumber_map[reorder[i]] = i;
- rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]];
- rev_gen_list[i] = opts->rev_gen_list[reorder[i]];
- }
- fz_free(ctx, opts->rev_renumber_map);
- fz_free(ctx, opts->rev_gen_list);
- opts->rev_renumber_map = rev_renumber_map;
- opts->rev_gen_list = rev_gen_list;
- fz_free(ctx, reorder);
- /* Apply the renumber_map */
- page_objects_list_renumber(opts);
- renumberobjs(xref, opts);
- page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists);
- }
- static void
- update_linearization_params(pdf_document *xref, pdf_write_options *opts)
- {
- int offset;
- pdf_set_int(opts->linear_l, opts->file_len);
- /* Primary hint stream offset (of object, not stream!) */
- pdf_set_int(opts->linear_h0, opts->ofs_list[xref->len-1]);
- /* Primary hint stream length (of object, not stream!) */
- offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
- pdf_set_int(opts->linear_h1, offset - opts->ofs_list[xref->len-1]);
- /* Object number of first pages page object (the first object of page 0) */
- pdf_set_int(opts->linear_o, opts->page_object_lists->page[0]->object[0]);
- /* Offset of end of first page (first page is followed by primary
- * hint stream (object n-1) then remaining pages (object 1...). The
- * primary hint stream counts as part of the first pages data, I think.
- */
- offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
- pdf_set_int(opts->linear_e, offset);
- /* Number of pages in document */
- pdf_set_int(opts->linear_n, opts->page_count);
- /* Offset of first entry in main xref table */
- pdf_set_int(opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len);
- /* Offset of shared objects hint table in the primary hint stream */
- pdf_set_int(opts->hints_s, opts->hints_shared_offset);
- /* Primary hint stream length */
- pdf_set_int(opts->hints_length, opts->hintstream_len);
- }
- /*
- * Make sure we have loaded objects from object streams.
- */
- static void preloadobjstms(pdf_document *xref)
- {
- pdf_obj *obj;
- int num;
- for (num = 0; num < xref->len; num++)
- {
- if (xref->table[num].type == 'o')
- {
- obj = pdf_load_object(xref, num, 0);
- pdf_drop_obj(obj);
- }
- }
- }
- /*
- * Save streams and objects to the output
- */
- static inline int isbinary(int c)
- {
- if (c == '\n' || c == '\r' || c == '\t')
- return 0;
- return c < 32 || c > 127;
- }
- static int isbinarystream(fz_buffer *buf)
- {
- int i;
- for (i = 0; i < buf->len; i++)
- if (isbinary(buf->data[i]))
- return 1;
- return 0;
- }
- static fz_buffer *hexbuf(fz_context *ctx, unsigned char *p, int n)
- {
- static const char hex[16] = "0123456789abcdef";
- fz_buffer *buf;
- int x = 0;
- buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2);
- while (n--)
- {
- buf->data[buf->len++] = hex[*p >> 4];
- buf->data[buf->len++] = hex[*p & 15];
- if (++x == 32)
- {
- buf->data[buf->len++] = '\n';
- x = 0;
- }
- p++;
- }
- buf->data[buf->len++] = '>';
- buf->data[buf->len++] = '\n';
- return buf;
- }
- static void addhexfilter(pdf_document *xref, pdf_obj *dict)
- {
- pdf_obj *f, *dp, *newf, *newdp;
- pdf_obj *ahx, *nullobj;
- fz_context *ctx = xref->ctx;
- ahx = pdf_new_name(ctx, "ASCIIHexDecode");
- nullobj = pdf_new_null(ctx);
- newf = newdp = NULL;
- f = pdf_dict_gets(dict, "Filter");
- dp = pdf_dict_gets(dict, "DecodeParms");
- if (pdf_is_name(f))
- {
- newf = pdf_new_array(ctx, 2);
- pdf_array_push(newf, ahx);
- pdf_array_push(newf, f);
- f = newf;
- if (pdf_is_dict(dp))
- {
- newdp = pdf_new_array(ctx, 2);
- pdf_array_push(newdp, nullobj);
- pdf_array_push(newdp, dp);
- dp = newdp;
- }
- }
- else if (pdf_is_array(f))
- {
- pdf_array_insert(f, ahx);
- if (pdf_is_array(dp))
- pdf_array_insert(dp, nullobj);
- }
- else
- f = ahx;
- pdf_dict_puts(dict, "Filter", f);
- if (dp)
- pdf_dict_puts(dict, "DecodeParms", dp);
- pdf_drop_obj(ahx);
- pdf_drop_obj(nullobj);
- pdf_drop_obj(newf);
- pdf_drop_obj(newdp);
- }
- static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen)
- {
- fz_buffer *buf, *tmp;
- pdf_obj *newlen;
- pdf_obj *obj;
- fz_context *ctx = xref->ctx;
- int orig_num = opts->rev_renumber_map[num];
- int orig_gen = opts->rev_gen_list[num];
- buf = pdf_load_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen);
- obj = pdf_copy_dict(ctx, obj_orig);
- if (opts->do_ascii && isbinarystream(buf))
- {
- tmp = hexbuf(ctx, buf->data, buf->len);
- fz_drop_buffer(ctx, buf);
- buf = tmp;
- addhexfilter(xref, obj);
- newlen = pdf_new_int(ctx, buf->len);
- pdf_dict_puts(obj, "Length", newlen);
- pdf_drop_obj(newlen);
- }
- fprintf(opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(opts->out, obj, opts->do_expand == 0);
- fprintf(opts->out, "stream\n");
- fwrite(buf->data, 1, buf->len, opts->out);
- fprintf(opts->out, "endstream\nendobj\n\n");
- fz_drop_buffer(ctx, buf);
- pdf_drop_obj(obj);
- }
- static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen)
- {
- fz_buffer *buf, *tmp;
- pdf_obj *newlen;
- pdf_obj *obj;
- fz_context *ctx = xref->ctx;
- int orig_num = opts->rev_renumber_map[num];
- int orig_gen = opts->rev_gen_list[num];
- int truncated = 0;
- buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen, (opts->continue_on_error ? &truncated : NULL));
- if (truncated && opts->errors)
- (*opts->errors)++;
- obj = pdf_copy_dict(ctx, obj_orig);
- pdf_dict_dels(obj, "Filter");
- pdf_dict_dels(obj, "DecodeParms");
- if (opts->do_ascii && isbinarystream(buf))
- {
- tmp = hexbuf(ctx, buf->data, buf->len);
- fz_drop_buffer(ctx, buf);
- buf = tmp;
- addhexfilter(xref, obj);
- }
- newlen = pdf_new_int(ctx, buf->len);
- pdf_dict_puts(obj, "Length", newlen);
- pdf_drop_obj(newlen);
- fprintf(opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(opts->out, obj, opts->do_expand == 0);
- fprintf(opts->out, "stream\n");
- fwrite(buf->data, 1, buf->len, opts->out);
- fprintf(opts->out, "endstream\nendobj\n\n");
- fz_drop_buffer(ctx, buf);
- pdf_drop_obj(obj);
- }
- static int is_image_filter(char *s)
- {
- if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF") ||
- !strcmp(s, "DCTDecode") || !strcmp(s, "DCT") ||
- !strcmp(s, "RunLengthDecode") || !strcmp(s, "RL") ||
- !strcmp(s, "JBIG2Decode") ||
- !strcmp(s, "JPXDecode"))
- return 1;
- return 0;
- }
- static int filter_implies_image(pdf_document *xref, pdf_obj *o)
- {
- if (!o)
- return 0;
- if (pdf_is_name(o))
- return is_image_filter(pdf_to_name(o));
- if (pdf_is_array(o))
- {
- int i, len;
- len = pdf_array_len(o);
- for (i = 0; i < len; i++)
- if (is_image_filter(pdf_to_name(pdf_array_get(o, i))))
- return 1;
- }
- return 0;
- }
- static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, int gen)
- {
- pdf_obj *obj;
- pdf_obj *type;
- fz_context *ctx = xref->ctx;
- fz_try(ctx)
- {
- obj = pdf_load_object(xref, num, gen);
- }
- fz_catch(ctx)
- {
- if (opts->continue_on_error)
- {
- fprintf(opts->out, "%d %d obj\nnull\nendobj\n", num, gen);
- if (opts->errors)
- (*opts->errors)++;
- fz_warn(ctx, "%s", fz_caught(ctx));
- return;
- }
- else
- fz_rethrow(ctx);
- }
- /* skip ObjStm and XRef objects */
- if (pdf_is_dict(obj))
- {
- type = pdf_dict_gets(obj, "Type");
- if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "ObjStm"))
- {
- opts->use_list[num] = 0;
- pdf_drop_obj(obj);
- return;
- }
- if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "XRef"))
- {
- opts->use_list[num] = 0;
- pdf_drop_obj(obj);
- return;
- }
- }
- if (!pdf_is_stream(xref, num, gen))
- {
- fprintf(opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(opts->out, obj, opts->do_expand == 0);
- fprintf(opts->out, "endobj\n\n");
- }
- else if (xref->table[num].stm_ofs < 0 && xref->table[num].stm_buf == NULL)
- {
- fprintf(opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(opts->out, obj, opts->do_expand == 0);
- fprintf(opts->out, "stream\nendstream\nendobj\n\n");
- }
- else
- {
- int dontexpand = 0;
- if (opts->do_expand != 0 && opts->do_expand != fz_expand_all)
- {
- pdf_obj *o;
- if ((o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "XObject")) &&
- (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Image")))
- dontexpand = !(opts->do_expand & fz_expand_images);
- if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "Font"))
- dontexpand = !(opts->do_expand & fz_expand_fonts);
- if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "FontDescriptor"))
- dontexpand = !(opts->do_expand & fz_expand_fonts);
- if ((o = pdf_dict_gets(obj, "Length1")) != NULL)
- dontexpand = !(opts->do_expand & fz_expand_fonts);
- if ((o = pdf_dict_gets(obj, "Length2")) != NULL)
- dontexpand = !(opts->do_expand & fz_expand_fonts);
- if ((o = pdf_dict_gets(obj, "Length3")) != NULL)
- dontexpand = !(opts->do_expand & fz_expand_fonts);
- if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Type1C"))
- dontexpand = !(opts->do_expand & fz_expand_fonts);
- if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "CIDFontType0C"))
- dontexpand = !(opts->do_expand & fz_expand_fonts);
- if (o = pdf_dict_gets(obj, "Filter"), filter_implies_image(xref, o))
- dontexpand = !(opts->do_expand & fz_expand_images);
- if (pdf_dict_gets(obj, "Width") != NULL && pdf_dict_gets(obj, "Height") != NULL)
- dontexpand = !(opts->do_expand & fz_expand_images);
- }
- fz_try(ctx)
- {
- if (opts->do_expand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
- expandstream(xref, opts, obj, num, gen);
- else
- copystream(xref, opts, obj, num, gen);
- }
- fz_catch(ctx)
- {
- if (opts->continue_on_error)
- {
- fprintf(opts->out, "%d %d obj\nnull\nendobj\n", num, gen);
- if (opts->errors)
- (*opts->errors)++;
- fz_warn(ctx, "%s", fz_caught(ctx));
- }
- else
- {
- pdf_drop_obj(obj);
- fz_rethrow(ctx);
- }
- }
- }
- pdf_drop_obj(obj);
- }
- static void writexref(pdf_document *xref, pdf_write_options *opts, int from, int to, int first, int main_xref_offset, int startxref)
- {
- pdf_obj *trailer = NULL;
- pdf_obj *obj;
- pdf_obj *nobj = NULL;
- int num;
- fz_context *ctx = xref->ctx;
- fprintf(opts->out, "xref\n%d %d\n", from, to - from);
- opts->first_xref_entry_offset = ftell(opts->out);
- for (num = from; num < to; num++)
- {
- if (opts->use_list[num])
- fprintf(opts->out, "%010d %05d n \n", opts->ofs_list[num], opts->gen_list[num]);
- else
- fprintf(opts->out, "%010d %05d f \n", opts->ofs_list[num], opts->gen_list[num]);
- }
- fprintf(opts->out, "\n");
- fz_var(trailer);
- fz_var(nobj);
- fz_try(ctx)
- {
- trailer = pdf_new_dict(ctx, 5);
- nobj = pdf_new_int(ctx, to);
- pdf_dict_puts(trailer, "Size", nobj);
- pdf_drop_obj(nobj);
- nobj = NULL;
- if (first)
- {
- obj = pdf_dict_gets(xref->trailer, "Info");
- if (obj)
- pdf_dict_puts(trailer, "Info", obj);
- obj = pdf_dict_gets(xref->trailer, "Root");
- if (obj)
- pdf_dict_puts(trailer, "Root", obj);
- obj = pdf_dict_gets(xref->trailer, "ID");
- if (obj)
- pdf_dict_puts(trailer, "ID", obj);
- }
- if (main_xref_offset != 0)
- {
- nobj = pdf_new_int(ctx, main_xref_offset);
- pdf_dict_puts(trailer, "Prev", nobj);
- pdf_drop_obj(nobj);
- nobj = NULL;
- }
- }
- fz_always(ctx)
- {
- pdf_drop_obj(nobj);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- fprintf(opts->out, "trailer\n");
- pdf_fprint_obj(opts->out, trailer, opts->do_expand == 0);
- fprintf(opts->out, "\n");
- pdf_drop_obj(trailer);
- fprintf(opts->out, "startxref\n%d\n%%%%EOF\n", startxref);
- }
- static void
- padto(FILE *file, int target)
- {
- int pos = ftell(file);
- assert(pos <= target);
- while (pos < target)
- {
- fputc('\n', file);
- pos++;
- }
- }
- static void
- dowriteobject(pdf_document *xref, pdf_write_options *opts, int num, int pass)
- {
- if (xref->table[num].type == 'f')
- opts->gen_list[num] = xref->table[num].gen;
- if (xref->table[num].type == 'n')
- opts->gen_list[num] = xref->table[num].gen;
- if (xref->table[num].type == 'o')
- opts->gen_list[num] = 0;
- /* If we are renumbering, then make sure all generation numbers are
- * zero (except object 0 which must be free, and have a gen number of
- * 65535). Changing the generation numbers (and indeed object numbers)
- * will break encryption - so only do this if we are renumbering
- * anyway. */
- if (opts->do_garbage >= 2)
- opts->gen_list[num] = (num == 0 ? 65535 : 0);
- if (opts->do_garbage && !opts->use_list[num])
- return;
- if (xref->table[num].type == 'n' || xref->table[num].type == 'o')
- {
- if (pass > 0)
- padto(opts->out, opts->ofs_list[num]);
- opts->ofs_list[num] = ftell(opts->out);
- writeobject(xref, opts, num, opts->gen_list[num]);
- }
- else
- opts->use_list[num] = 0;
- }
- static void
- writeobjects(pdf_document *xref, pdf_write_options *opts, int pass)
- {
- int num;
- fprintf(opts->out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10);
- fprintf(opts->out, "%%\316\274\341\277\246\n\n");
- dowriteobject(xref, opts, opts->start, pass);
- if (opts->do_linear)
- {
- /* Write first xref */
- if (pass == 0)
- opts->first_xref_offset = ftell(opts->out);
- else
- padto(opts->out, opts->first_xref_offset);
- writexref(xref, opts, opts->start, xref->len, 1, opts->main_xref_offset, 0);
- }
- for (num = opts->start+1; num < xref->len; num++)
- dowriteobject(xref, opts, num, pass);
- if (opts->do_linear && pass == 1)
- {
- int offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
- padto(opts->out, offset);
- }
- for (num = 1; num < opts->start; num++)
- {
- if (pass == 1)
- opts->ofs_list[num] += opts->hintstream_len;
- dowriteobject(xref, opts, num, pass);
- }
- }
- static int
- my_log2(int x)
- {
- int i = 0;
- if (x <= 0)
- return 0;
- while ((1<<i) <= x && (1<<i) > 0)
- i++;
- if ((1<<i) <= 0)
- return 0;
- return i;
- }
- static void
- make_page_offset_hints(pdf_document *xref, pdf_write_options *opts, fz_buffer *buf)
- {
- fz_context *ctx = xref->ctx;
- int i, j;
- int min_objs_per_page, max_objs_per_page;
- int min_page_length, max_page_length;
- int objs_per_page_bits;
- int min_shared_object, max_shared_object;
- int max_shared_object_refs;
- int min_shared_length, max_shared_length;
- page_objects **pop = &opts->page_object_lists->page[0];
- int page_len_bits, shared_object_bits, shared_object_id_bits;
- int shared_length_bits;
- min_shared_object = xref->len;
- max_shared_object = 1;
- min_shared_length = opts->file_len;
- max_shared_length = 0;
- for (i=1; i < xref->len; i++)
- {
- int min, max, page;
- min = opts->ofs_list[i];
- if (i == opts->start-1 || (opts->start == 1 && i == xref->len-1))
- max = opts->main_xref_offset;
- else if (i == xref->len-1)
- max = opts->ofs_list[1];
- else
- max = opts->ofs_list[i+1];
- assert(max > min);
- if (opts->use_list[i] & USE_SHARED)
- {
- page = -1;
- if (i < min_shared_object)
- min_shared_object = i;
- if (i > max_shared_object)
- max_shared_object = i;
- if (min_shared_length > max - min)
- min_shared_length = max - min;
- if (max_shared_length < max - min)
- max_shared_length = max - min;
- }
- else if (opts->use_list[i] & (USE_CATALOGUE | USE_HINTS | USE_PARAMS))
- page = -1;
- else if (opts->use_list[i] & USE_PAGE1)
- {
- page = 0;
- if (min_shared_length > max - min)
- min_shared_length = max - min;
- if (max_shared_length < max - min)
- max_shared_length = max - min;
- }
- else if (opts->use_list[i] == 0)
- page = -1;
- else
- page = opts->use_list[i]>>USE_PAGE_SHIFT;
- if (page >= 0)
- {
- pop[page]->num_objects++;
- if (pop[page]->min_ofs > min)
- pop[page]->min_ofs = min;
- if (pop[page]->max_ofs < max)
- pop[page]->max_ofs = max;
- }
- }
- min_objs_per_page = max_objs_per_page = pop[0]->num_objects;
- min_page_length = max_page_length = pop[0]->max_ofs - pop[0]->min_ofs;
- for (i=1; i < opts->page_count; i++)
- {
- int tmp;
- if (min_objs_per_page > pop[i]->num_objects)
- min_objs_per_page = pop[i]->num_objects;
- if (max_objs_per_page < pop[i]->num_objects)
- max_objs_per_page = pop[i]->num_objects;
- tmp = pop[i]->max_ofs - pop[i]->min_ofs;
- if (tmp < min_page_length)
- min_page_length = tmp;
- if (tmp > max_page_length)
- max_page_length = tmp;
- }
- for (i=0; i < opts->page_count; i++)
- {
- int count = 0;
- int j;
- page_objects *po = opts->page_object_lists->page[i];
- for (j = 0; j < po->len; j++)
- {
- if (i == 0 && opts->use_list[po->object[j]] & USE_PAGE1)
- count++;
- else if (i != 0 && opts->use_list[po->object[j]] & USE_SHARED)
- count++;
- }
- po->num_shared = count;
- if (i == 0 || count > max_shared_object_refs)
- max_shared_object_refs = count;
- }
- if (min_shared_object > max_shared_object)
- min_shared_object = max_shared_object = 0;
- /* Table F.3 - Header */
- /* Header Item 1: Least number of objects in a page */
- fz_write_buffer_bits(ctx, buf, min_objs_per_page, 32);
- /* Header Item 2: Location of first pages page object */
- fz_write_buffer_bits(ctx, buf, opts->ofs_list[pop[0]->page_object_number], 32);
- /* Header Item 3: Number of bits required to represent the difference
- * between the greatest and least number of objects in a page. */
- objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page);
- fz_write_buffer_bits(ctx, buf, objs_per_page_bits, 16);
- /* Header Item 4: Least length of a page. */
- fz_write_buffer_bits(ctx, buf, min_page_length, 32);
- /* Header Item 5: Number of bits needed to represent the difference
- * between the greatest and least length of a page. */
- page_len_bits = my_log2(max_page_length - min_page_length);
- fz_write_buffer_bits(ctx, buf, page_len_bits, 16);
- /* Header Item 6: Least offset to start of content stream (Acrobat
- * sets this to always be 0) */
- fz_write_buffer_bits(ctx, buf, 0, 32);
- /* Header Item 7: Number of bits needed to represent the difference
- * between the greatest and least offset to content stream (Acrobat
- * sets this to always be 0) */
- fz_write_buffer_bits(ctx, buf, 0, 16);
- /* Header Item 8: Least content stream length. (Acrobat
- * sets this to always be 0) */
- fz_write_buffer_bits(ctx, buf, 0, 32);
- /* Header Item 9: Number of bits needed to represent the difference
- * between the greatest and least content stream length (Acrobat
- * sets this to always be the same as item 5) */
- fz_write_buffer_bits(ctx, buf, page_len_bits, 16);
- /* Header Item 10: Number of bits needed to represent the greatest
- * number of shared object references. */
- shared_object_bits = my_log2(max_shared_object_refs);
- fz_write_buffer_bits(ctx, buf, shared_object_bits, 16);
- /* Header Item 11: Number of bits needed to represent the greatest
- * shared object identifier. */
- shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[0]->num_shared);
- fz_write_buffer_bits(ctx, buf, shared_object_id_bits, 16);
- /* Header Item 12: Number of bits needed to represent the numerator
- * of the fractions. We always send 0. */
- fz_write_buffer_bits(ctx, buf, 0, 16);
- /* Header Item 13: Number of bits needed to represent the denominator
- * of the fractions. We always send 0. */
- fz_write_buffer_bits(ctx, buf, 0, 16);
- /* Table F.4 - Page offset hint table (per page) */
- /* Item 1: A number that, when added to the least number of objects
- * on a page, gives the number of objects in the page. */
- for (i = 0; i < opts->page_count; i++)
- {
- fz_write_buffer_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits);
- }
- fz_write_buffer_pad(ctx, buf);
- /* Item 2: A number that, when added to the least page length, gives
- * the length of the page in bytes. */
- for (i = 0; i < opts->page_count; i++)
- {
- fz_write_buffer_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
- }
- fz_write_buffer_pad(ctx, buf);
- /* Item 3: The number of shared objects referenced from the page. */
- for (i = 0; i < opts->page_count; i++)
- {
- fz_write_buffer_bits(ctx, buf, pop[i]->num_shared, shared_object_bits);
- }
- fz_write_buffer_pad(ctx, buf);
- /* Item 4: Shared object id for each shared object ref in every page.
- * Spec says "not for page 1", but acrobat does send page 1's - all
- * as zeros. */
- for (i = 0; i < opts->page_count; i++)
- {
- for (j = 0; j < pop[i]->len; j++)
- {
- int o = pop[i]->object[j];
- if (i == 0 && opts->use_list[o] & USE_PAGE1)
- fz_write_buffer_bits(ctx, buf, 0 /* o - pop[0]->page_object_number */, shared_object_id_bits);
- if (i != 0 && opts->use_list[o] & USE_SHARED)
- fz_write_buffer_bits(ctx, buf, o - min_shared_object + pop[0]->num_shared, shared_object_id_bits);
- }
- }
- fz_write_buffer_pad(ctx, buf);
- /* Item 5: Numerator of fractional position for each shared object reference. */
- /* We always send 0 in 0 bits */
- /* Item 6: A number that, when added to the least offset to the start
- * of the content stream (F.3 Item 6), gives the offset in bytes of
- * start of the pages content stream object relative to the beginning
- * of the page. Always 0 in 0 bits. */
- /* Item 7: A number that, when added to the least content stream length
- * (F.3 Item 8), gives the length of the pages content stream object.
- * Always == Item 2 as least content stream length = least page stream
- * length.
- */
- for (i = 0; i < opts->page_count; i++)
- {
- fz_write_buffer_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
- }
- /* Pad, and then do shared object hint table */
- fz_write_buffer_pad(ctx, buf);
- opts->hints_shared_offset = buf->len;
- /* Table F.5: */
- /* Header Item 1: Object number of the first object in the shared
- * objects section. */
- fz_write_buffer_bits(ctx, buf, min_shared_object, 32);
- /* Header Item 2: Location of first object in the shared objects
- * section. */
- fz_write_buffer_bits(ctx, buf, opts->ofs_list[min_shared_object], 32);
- /* Header Item 3: The number of shared object entries for the first
- * page. */
- fz_write_buffer_bits(ctx, buf, pop[0]->num_shared, 32);
- /* Header Item 4: The number of shared object entries for the shared
- * objects section + first page. */
- fz_write_buffer_bits(ctx, buf, max_shared_object - min_shared_object + pop[0]->num_shared, 32);
- /* Header Item 5: The number of bits needed to represent the greatest
- * number of objects in a shared object group (Always 0). */
- fz_write_buffer_bits(ctx, buf, 0, 16);
- /* Header Item 6: The least length of a shared object group in bytes. */
- fz_write_buffer_bits(ctx, buf, min_shared_length, 32);
- /* Header Item 7: The number of bits required to represent the
- * difference between the greatest and least length of a shared object
- * group. */
- shared_length_bits = my_log2(max_shared_length - min_shared_length);
- fz_write_buffer_bits(ctx, buf, shared_length_bits, 16);
- /* Table F.6 */
- /* Item 1: Shared object group length (page 1 objects) */
- for (j = 0; j < pop[0]->len; j++)
- {
- int o = pop[0]->object[j];
- int min, max;
- min = opts->ofs_list[o];
- if (o == opts->start-1)
- max = opts->main_xref_offset;
- else if (o < xref->len-1)
- max = opts->ofs_list[o+1];
- else
- max = opts->ofs_list[1];
- if (opts->use_list[o] & USE_PAGE1)
- fz_write_buffer_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
- }
- /* Item 1: Shared object group length (shared objects) */
- for (i = min_shared_object; i <= max_shared_object; i++)
- {
- int min, max;
- min = opts->ofs_list[i];
- if (i == opts->start-1)
- max = opts->main_xref_offset;
- else if (i < xref->len-1)
- max = opts->ofs_list[i+1];
- else
- max = opts->ofs_list[1];
- fz_write_buffer_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
- }
- fz_write_buffer_pad(ctx, buf);
- /* Item 2: MD5 presence flags */
- for (i = max_shared_object - min_shared_object + pop[0]->num_shared; i > 0; i--)
- {
- fz_write_buffer_bits(ctx, buf, 0, 1);
- }
- fz_write_buffer_pad(ctx, buf);
- /* Item 3: MD5 sums (not present) */
- fz_write_buffer_pad(ctx, buf);
- /* Item 4: Number of objects in the group (not present) */
- }
- static void
- make_hint_stream(pdf_document *xref, pdf_write_options *opts)
- {
- fz_context *ctx = xref->ctx;
- fz_buffer *buf = fz_new_buffer(ctx, 100);
- fz_try(ctx)
- {
- make_page_offset_hints(xref, opts, buf);
- pdf_update_stream(xref, xref->len-1, buf);
- opts->hintstream_len = buf->len;
- fz_drop_buffer(ctx, buf);
- }
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, buf);
- fz_rethrow(ctx);
- }
- }
- #ifdef DEBUG_WRITING
- static void dump_object_details(pdf_document *xref, pdf_write_options *opts)
- {
- int i;
- for (i = 0; i < xref->len; i++)
- {
- fprintf(stderr, "%d@%d: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
- }
- }
- #endif
- void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz_opts)
- {
- int lastfree;
- int num;
- pdf_write_options opts = { 0 };
- fz_context *ctx;
- if (!xref)
- return;
- ctx = xref->ctx;
- opts.out = fopen(filename, "wb");
- if (!opts.out)
- fz_throw(ctx, "cannot open output file '%s'", filename);
- fz_try(ctx)
- {
- opts.do_expand = fz_opts ? fz_opts->do_expand : 0;
- opts.do_garbage = fz_opts ? fz_opts->do_garbage : 0;
- opts.do_ascii = fz_opts ? fz_opts->do_ascii: 0;
- opts.do_linear = fz_opts ? fz_opts->do_linear: 0;
- opts.start = 0;
- opts.main_xref_offset = INT_MIN;
- /* We deliberately make these arrays long enough to cope with
- * 1 to n access rather than 0..n-1, and add space for 2 new
- * extra entries that may be required for linearization. */
- opts.use_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
- opts.ofs_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
- opts.gen_list = fz_calloc(ctx, xref->len + 3, sizeof(int));
- opts.renumber_map = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
- opts.rev_renumber_map = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
- opts.rev_gen_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
- opts.continue_on_error = fz_opts->continue_on_error;
- opts.errors = fz_opts->errors;
- for (num = 0; num < xref->len; num++)
- {
- opts.use_list[num] = 0;
- opts.ofs_list[num] = 0;
- opts.renumber_map[num] = num;
- opts.rev_renumber_map[num] = num;
- opts.rev_gen_list[num] = xref->table[num].gen;
- }
- /* Make sure any objects hidden in compressed streams have been loaded */
- preloadobjstms(xref);
- /* Sweep & mark objects from the trailer */
- if (opts.do_garbage >= 1)
- sweepobj(xref, &opts, xref->trailer);
- else
- for (num = 0; num < xref->len; num++)
- opts.use_list[num] = 1;
- /* Coalesce and renumber duplicate objects */
- if (opts.do_garbage >= 3)
- removeduplicateobjs(xref, &opts);
- /* Compact xref by renumbering and removing unused objects */
- if (opts.do_garbage >= 2 || opts.do_linear)
- compactxref(xref, &opts);
- /* Make renumbering affect all indirect references and update xref */
- if (opts.do_garbage >= 2 || opts.do_linear)
- renumberobjs(xref, &opts);
- if (opts.do_linear)
- {
- linearize(xref, &opts);
- }
- writeobjects(xref, &opts, 0);
- #ifdef DEBUG_WRITING
- dump_object_details(xref, &opts);
- #endif
- /* Construct linked list of free object slots */
- lastfree = 0;
- for (num = 0; num < xref->len; num++)
- {
- if (!opts.use_list[num])
- {
- opts.gen_list[num]++;
- opts.ofs_list[lastfree] = num;
- lastfree = num;
- }
- }
- if (opts.do_linear)
- {
- opts.main_xref_offset = ftell(opts.out);
- writexref(xref, &opts, 0, opts.start, 0, 0, opts.first_xref_offset);
- opts.file_len = ftell(opts.out);
- make_hint_stream(xref, &opts);
- opts.file_len += opts.hintstream_len;
- opts.main_xref_offset += opts.hintstream_len;
- update_linearization_params(xref, &opts);
- fseek(opts.out, 0, 0);
- writeobjects(xref, &opts, 1);
- padto(opts.out, opts.main_xref_offset);
- writexref(xref, &opts, 0, opts.start, 0, 0, opts.first_xref_offset);
- }
- else
- {
- opts.first_xref_offset = ftell(opts.out);
- writexref(xref, &opts, 0, xref->len, 1, 0, opts.first_xref_offset);
- }
- xref->dirty = 0;
- }
- fz_always(ctx)
- {
- #ifdef DEBUG_LINEARIZATION
- page_objects_dump(&opts);
- objects_dump(xref, &opts);
- #endif
- fz_free(ctx, opts.use_list);
- fz_free(ctx, opts.ofs_list);
- fz_free(ctx, opts.gen_list);
- fz_free(ctx, opts.renumber_map);
- fz_free(ctx, opts.rev_renumber_map);
- fz_free(ctx, opts.rev_gen_list);
- pdf_drop_obj(opts.linear_l);
- pdf_drop_obj(opts.linear_h0);
- pdf_drop_obj(opts.linear_h1);
- pdf_drop_obj(opts.linear_o);
- pdf_drop_obj(opts.linear_e);
- pdf_drop_obj(opts.linear_n);
- pdf_drop_obj(opts.linear_t);
- pdf_drop_obj(opts.hints_s);
- pdf_drop_obj(opts.hints_length);
- page_objects_list_destroy(ctx, opts.page_object_lists);
- fclose(opts.out);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
|