gprof.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. /*-
  2. * Copyright (c) 1983, 1992, 1993
  3. * The Regents of the University of California. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 4. Neither the name of the University nor the names of its contributors
  14. * may be used to endorse or promote products derived from this software
  15. * without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. // Modified based on
  30. // - https://github.com/bminor/newlib/blob/master/winsup/cygwin/gmon.c
  31. // - https://github.com/bminor/newlib/blob/master/winsup/cygwin/local_includes/gmon.h
  32. #include <stdlib.h>
  33. #include <stdio.h>
  34. #include <stdint.h>
  35. #include <string.h>
  36. #include "gprof_api.h"
  37. //#define DEBUG
  38. /*
  39. * Structure prepended to gmon.out profiling data file.
  40. */
  41. struct gmonhdr {
  42. size_t lpc; /* base pc address of sample buffer */
  43. size_t hpc; /* max pc address of sampled buffer */
  44. int ncnt; /* size of sample buffer (plus this header) */
  45. int version; /* version number */
  46. int profrate; /* profiling clock rate */
  47. int spare[3]; /* reserved */
  48. };
  49. #define GMONVERSION 0x00051879
  50. /*
  51. * histogram counters are unsigned shorts (according to the kernel).
  52. */
  53. #define HISTCOUNTER unsigned short
  54. /*
  55. * fraction of text space to allocate for histogram counters here, 1/2
  56. */
  57. #define HISTFRACTION 2
  58. /*
  59. * Fraction of text space to allocate for from hash buckets.
  60. * The value of HASHFRACTION is based on the minimum number of bytes
  61. * of separation between two subroutine call points in the object code.
  62. * Given MIN_SUBR_SEPARATION bytes of separation the value of
  63. * HASHFRACTION is calculated as:
  64. *
  65. * HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
  66. *
  67. * For example, on the VAX, the shortest two call sequence is:
  68. *
  69. * calls $0,(r0)
  70. * calls $0,(r0)
  71. *
  72. * which is separated by only three bytes, thus HASHFRACTION is
  73. * calculated as:
  74. *
  75. * HASHFRACTION = 3 / (2 * 2 - 1) = 1
  76. *
  77. * Note that the division above rounds down, thus if MIN_SUBR_FRACTION
  78. * is less than three, this algorithm will not work!
  79. *
  80. * In practice, however, call instructions are rarely at a minimal
  81. * distance. Hence, we will define HASHFRACTION to be 2 across all
  82. * architectures. This saves a reasonable amount of space for
  83. * profiling data structures without (in practice) sacrificing
  84. * any granularity.
  85. */
  86. #define HASHFRACTION 2
  87. /*
  88. * percent of text space to allocate for tostructs with a minimum.
  89. */
  90. #define ARCDENSITY 2 /* this is in percentage, relative to text size! */
  91. #define MINARCS 50
  92. #define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER))) - 2)
  93. struct tostruct {
  94. size_t selfpc; /* callee address/program counter. The caller address is in froms[] array which points to tos[] array */
  95. long count; /* how many times it has been called */
  96. unsigned short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */
  97. unsigned short pad; /* additional padding bytes, to have entries 4byte aligned */
  98. };
  99. /*
  100. * a raw arc, with pointers to the calling site and
  101. * the called site and a count.
  102. */
  103. struct rawarc {
  104. size_t raw_frompc;
  105. size_t raw_selfpc;
  106. long raw_count;
  107. };
  108. /*
  109. * general rounding functions.
  110. */
  111. #define ROUNDDOWN(x,y) (((x)/(y))*(y))
  112. #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
  113. /*
  114. * The profiling data structures are housed in this structure.
  115. */
  116. struct gmonparam {
  117. int state;
  118. int already_setup;
  119. unsigned short *kcount; /* histogram PC sample array */
  120. size_t kcountsize; /* size of kcount[] array in bytes */
  121. unsigned short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */
  122. size_t fromssize; /* size of froms[] array in bytes */
  123. struct tostruct *tos; /* to struct, contains histogram counter */
  124. size_t tossize; /* size of tos[] array in bytes */
  125. long tolimit;
  126. size_t lowpc; /* low program counter of area */
  127. size_t highpc; /* high program counter */
  128. size_t textsize; /* code size */
  129. size_t scale;
  130. };
  131. /* gprof data structure */
  132. struct gprofdata {
  133. char *buf;
  134. uint32_t size;
  135. };
  136. /* convert an addr to an index */
  137. #define PROFIDX(pc, base, scale) \
  138. ({ \
  139. size_t i = (pc - base) / 2; \
  140. if (sizeof (unsigned long long int) > sizeof (size_t)) { \
  141. i = (unsigned long long int) i * scale / 65536; \
  142. } else { \
  143. i = i / 65536 * scale + i % 65536 * scale / 65536; \
  144. } \
  145. i; \
  146. })
  147. /* convert an index into an address */
  148. #define PROFADDR(idx, base, scale) \
  149. ((base) + ((((unsigned long long)(idx) << 16) \
  150. / (unsigned long long)(scale)) << 1))
  151. /*
  152. * Possible states of profiling.
  153. */
  154. #define GMON_PROF_ON 0
  155. #define GMON_PROF_BUSY 1
  156. #define GMON_PROF_ERROR 2
  157. #define GMON_PROF_OFF 3
  158. #define ERR(s) fprintf(stderr, "%s", s)
  159. static struct gmonparam _gmonparam = { GMON_PROF_OFF, 0, NULL,
  160. 0, NULL, 0, NULL, 0, 0L, 0, 0, 0, 0 };
  161. /* see profil(2) where this is described (incorrectly) */
  162. #define SCALE_1_TO_1 0x10000L
  163. #define GMONPARAM (&_gmonparam)
  164. /* Where the gprof data stored after execute gprof_collect(0) */
  165. struct gprofdata gprof_data = {NULL, 0};
  166. /*
  167. * Control profiling
  168. * profiling is what mcount checks to see if
  169. * all the data structures are ready.
  170. */
  171. static void moncontrol(int mode)
  172. {
  173. struct gmonparam *p = GMONPARAM;
  174. if (mode) {
  175. /* start */
  176. gprof_on();
  177. p->state = GMON_PROF_ON;
  178. } else {
  179. /* stop */
  180. gprof_off();
  181. p->state = GMON_PROF_OFF;
  182. }
  183. }
  184. static void monstartup(size_t lowpc, size_t highpc)
  185. {
  186. register size_t o;
  187. char *cp;
  188. struct gmonparam *p = GMONPARAM;
  189. /*
  190. * round lowpc and highpc to multiples of the density we're using
  191. * so the rest of the scaling (here and in gprof) stays in ints.
  192. */
  193. p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
  194. p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
  195. p->textsize = p->highpc - p->lowpc;
  196. p->kcountsize = p->textsize / HISTFRACTION;
  197. p->fromssize = p->textsize / HASHFRACTION;
  198. p->tolimit = p->textsize * ARCDENSITY / 100;
  199. if (p->tolimit < MINARCS) {
  200. p->tolimit = MINARCS;
  201. } else if (p->tolimit > MAXARCS) {
  202. p->tolimit = MAXARCS;
  203. }
  204. p->tossize = p->tolimit * sizeof(struct tostruct);
  205. cp = malloc(p->kcountsize + p->fromssize + p->tossize);
  206. if (cp == (char*) NULL) {
  207. p->state = GMON_PROF_ERROR;
  208. ERR("monstartup: out of memory\n");
  209. return;
  210. }
  211. /* zero out cp as value will be added there */
  212. memset(cp, 0, p->kcountsize + p->fromssize + p->tossize);
  213. p->tos = (struct tostruct*) cp;
  214. cp += p->tossize;
  215. p->kcount = (unsigned short*) cp;
  216. cp += p->kcountsize;
  217. p->froms = (unsigned short*) cp;
  218. p->tos[0].link = 0;
  219. o = p->highpc - p->lowpc;
  220. if (p->kcountsize < o) {
  221. #ifndef notdef
  222. p->scale = ((float) p->kcountsize / o) * SCALE_1_TO_1;
  223. #else /* avoid floating point */
  224. int quot = o / p->kcountsize;
  225. if (quot >= 0x10000)
  226. p->scale = 1;
  227. else if (quot >= 0x100)
  228. p->scale = 0x10000 / quot;
  229. else if (o >= 0x800000)
  230. p->scale = 0x1000000 / (o / (p->kcountsize >> 8));
  231. else
  232. p->scale = 0x1000000 / ((o << 8) / p->kcountsize);
  233. #endif
  234. } else {
  235. p->scale = SCALE_1_TO_1;
  236. }
  237. moncontrol(1); /* start */
  238. }
  239. void _mcount(uint32_t *frompcindex)
  240. {
  241. register uint32_t *selfpc asm("ra");
  242. register struct tostruct *top;
  243. register struct tostruct *prevtop;
  244. register long toindex;
  245. struct gmonparam *p = GMONPARAM;
  246. if (!p->already_setup) {
  247. p->already_setup = 1;
  248. monstartup((size_t) PROGRAM_LOWPC, (size_t) PROGRAM_HIGHPC);
  249. }
  250. /*
  251. * check that we are profiling
  252. * and that we aren't recursively invoked.
  253. */
  254. if (p->state != GMON_PROF_ON) {
  255. goto out;
  256. }
  257. p->state++;
  258. /*
  259. * check that frompcindex is a reasonable pc value.
  260. * for example: signal catchers get called from the stack,
  261. * not from text space. too bad.
  262. */
  263. frompcindex = (uint32_t*) ((unsigned long) frompcindex - (unsigned long) p->lowpc);
  264. if ((unsigned long) frompcindex > p->textsize) {
  265. goto done;
  266. }
  267. frompcindex = (uint32_t*) &p->froms[((unsigned long) frompcindex)
  268. / (HASHFRACTION * sizeof(*p->froms))];
  269. toindex = *((unsigned short*) frompcindex); /* get froms[] value */
  270. if (toindex == 0) {
  271. /*
  272. * first time traversing this arc
  273. */
  274. toindex = ++p->tos[0].link; /* the link of tos[0] points to the last used record in the array */
  275. if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */
  276. goto overflow;
  277. }
  278. *((unsigned short*) frompcindex) = (unsigned short) toindex; /* store new 'to' value into froms[] */
  279. top = &p->tos[toindex];
  280. top->selfpc = (size_t) selfpc;
  281. top->count = 1;
  282. top->link = 0;
  283. goto done;
  284. }
  285. top = &p->tos[toindex];
  286. if (top->selfpc == (size_t) selfpc) {
  287. /*
  288. * arc at front of chain; usual case.
  289. */
  290. top->count++;
  291. goto done;
  292. }
  293. /*
  294. * have to go looking down chain for it.
  295. * top points to what we are looking at,
  296. * prevtop points to previous top.
  297. * we know it is not at the head of the chain.
  298. */
  299. for (; /* goto done */;) {
  300. if (top->link == 0) {
  301. /*
  302. * top is end of the chain and none of the chain
  303. * had top->selfpc == selfpc.
  304. * so we allocate a new tostruct
  305. * and link it to the head of the chain.
  306. */
  307. toindex = ++p->tos[0].link;
  308. if (toindex >= p->tolimit) {
  309. goto overflow;
  310. }
  311. top = &p->tos[toindex];
  312. top->selfpc = (size_t) selfpc;
  313. top->count = 1;
  314. top->link = *((unsigned short*) frompcindex);
  315. *(unsigned short*) frompcindex = (unsigned short) toindex;
  316. goto done;
  317. }
  318. /*
  319. * otherwise, check the next arc on the chain.
  320. */
  321. prevtop = top;
  322. top = &p->tos[top->link];
  323. if (top->selfpc == (size_t) selfpc) {
  324. /*
  325. * there it is.
  326. * increment its count
  327. * move it to the head of the chain.
  328. */
  329. top->count++;
  330. toindex = prevtop->link;
  331. prevtop->link = top->link;
  332. top->link = *((unsigned short*) frompcindex);
  333. *((unsigned short*) frompcindex) = (unsigned short) toindex;
  334. goto done;
  335. }
  336. }
  337. done: p->state--;
  338. /* and fall through */
  339. out: return; /* normal return restores saved registers */
  340. overflow: p->state++; /* halt further profiling */
  341. #define TOLIMIT "mcount: tos overflow\n"
  342. printf("%s", TOLIMIT);
  343. goto out;
  344. }
  345. #define NUM_OCTETS_PER_LINE 20
  346. #define FLUSH_OUTPUT() fflush(stdout)
  347. static void hexdumpbuf(char *buf, unsigned long sz)
  348. {
  349. unsigned long rem, cur = 0, i = 0;
  350. FLUSH_OUTPUT();
  351. while (cur < sz) {
  352. rem = ((sz - cur) < NUM_OCTETS_PER_LINE) ? (sz - cur) : NUM_OCTETS_PER_LINE;
  353. for (i = 0; i < rem; i++) {
  354. printf("%02x", buf[cur + i]);
  355. }
  356. printf("\n");
  357. FLUSH_OUTPUT();
  358. cur += rem;
  359. }
  360. }
  361. long gprof_collect(unsigned long interface)
  362. {
  363. static const char gmon_out[] = "gmon.out";
  364. FILE *fp = NULL;
  365. int hz;
  366. int fromindex;
  367. int endfrom;
  368. size_t frompc;
  369. int toindex;
  370. struct rawarc rawarc;
  371. struct gmonparam *p = GMONPARAM;
  372. struct gmonhdr gmonhdr, *hdr;
  373. const char *proffile;
  374. char *bufptr;
  375. #ifdef DEBUG
  376. int log, len;
  377. char dbuf[200];
  378. #endif
  379. if (p->state == GMON_PROF_ERROR) {
  380. ERR("_mcleanup: tos overflow\n");
  381. return -1;
  382. }
  383. hz = PROF_HZ;
  384. moncontrol(0); /* stop */
  385. if (interface == 0) {
  386. gprof_data.size = 0;
  387. if (gprof_data.buf == NULL) {
  388. gprof_data.buf = malloc(sizeof(gmonhdr) + p->kcountsize + p->tolimit * sizeof(struct rawarc));
  389. if (gprof_data.buf == NULL) {
  390. ERR("gprof_collect: unable to malloc enough memory to store gprof data\n");
  391. return -1;
  392. }
  393. }
  394. bufptr = gprof_data.buf;
  395. } else if (interface == 1) {
  396. proffile = gmon_out;
  397. fp = fopen(proffile, "wb");
  398. if (fp == NULL) {
  399. printf("Unable to open %s\n", proffile);
  400. return -1;
  401. }
  402. } else {
  403. printf("\nDump profiling data start\n");
  404. }
  405. #ifdef DEBUG
  406. len = sprintf(dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n",
  407. p->kcount, p->kcountsize);
  408. printf("%s", dbuf);
  409. #endif
  410. hdr = (struct gmonhdr*) &gmonhdr;
  411. hdr->lpc = p->lowpc;
  412. hdr->hpc = p->highpc;
  413. hdr->ncnt = p->kcountsize + sizeof(gmonhdr);
  414. hdr->version = GMONVERSION;
  415. hdr->profrate = hz;
  416. if (interface == 0) {
  417. memcpy(bufptr, (void*) hdr, sizeof *hdr);
  418. bufptr += sizeof *hdr;
  419. memcpy(bufptr, (void*) p->kcount, p->kcountsize);
  420. bufptr += p->kcountsize;
  421. } else if (interface == 1) {
  422. fwrite((const char*) hdr, 1, sizeof *hdr, fp);
  423. fwrite((const char*) p->kcount, 1, p->kcountsize, fp);
  424. } else {
  425. hexdumpbuf((char*) hdr, sizeof *hdr);
  426. hexdumpbuf((char *)p->kcount, (unsigned long)p->kcountsize);
  427. }
  428. #ifdef DEBUG
  429. len = sprintf(dbuf, "[mcleanup1] lowpc 0x%lx highpc 0x%lx ncnt %d\n",
  430. hdr->lpc, hdr->hpc, hdr->ncnt);
  431. printf("%s", dbuf);
  432. #endif
  433. endfrom = p->fromssize / sizeof(*p->froms);
  434. for (fromindex = 0; fromindex < endfrom; fromindex++) {
  435. if (p->froms[fromindex] == 0) {
  436. continue;
  437. }
  438. frompc = p->lowpc;
  439. frompc += fromindex * HASHFRACTION * sizeof(*p->froms);
  440. for (toindex = p->froms[fromindex]; toindex != 0; toindex =
  441. p->tos[toindex].link) {
  442. #ifdef DEBUG
  443. len = sprintf(dbuf,
  444. "[mcleanup2] frompc 0x%x selfpc 0x%x count %d\n" ,
  445. frompc, p->tos[toindex].selfpc,
  446. p->tos[toindex].count);
  447. printf("%s", dbuf);
  448. #endif
  449. rawarc.raw_frompc = frompc;
  450. rawarc.raw_selfpc = p->tos[toindex].selfpc;
  451. rawarc.raw_count = p->tos[toindex].count;
  452. if (interface == 0) {
  453. memcpy(bufptr, (void*) &rawarc, sizeof rawarc);
  454. bufptr += sizeof rawarc;
  455. } else if (interface == 1) {
  456. fwrite((const char*)&rawarc, 1, sizeof rawarc, fp);
  457. } else {
  458. hexdumpbuf((char *)(&rawarc), (unsigned long)(sizeof rawarc));
  459. }
  460. }
  461. }
  462. if (interface == 0) {
  463. gprof_data.size = bufptr - gprof_data.buf;
  464. printf("Collected gprof data @0x%lx, size %lu bytes\n", gprof_data.buf, gprof_data.size);
  465. } else if (interface == 1) {
  466. fclose(fp);
  467. printf("Write %s done!\n", gmon_out);
  468. } else {
  469. printf("\nCREATE: %s\n", gmon_out);
  470. printf("\nDump profiling data finished\n");
  471. }
  472. return 0;
  473. }
  474. /* sample the current program counter */
  475. void gprof_sample(unsigned long pc)
  476. {
  477. size_t idx;
  478. struct gmonparam *p = GMONPARAM;
  479. if (p->state == GMON_PROF_ON) {
  480. if (pc >= p->lowpc && pc < p->highpc) {
  481. idx = PROFIDX(pc, p->lowpc, p->scale);
  482. p->kcount[idx]++;
  483. }
  484. }
  485. }