| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442 |
- /*
- Author : Shay Gal-On, EEMBC
- This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009
- All rights reserved.
- EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the
- CoreMark License that is distributed with the official EEMBC COREMARK Software release.
- If you received this EEMBC CoreMark Software without the accompanying CoreMark License,
- you must discontinue use and download the official release from www.coremark.org.
- Also, if you are publicly displaying scores generated from the EEMBC CoreMark software,
- make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file.
- EEMBC
- 4354 Town Center Blvd. Suite 114-200
- El Dorado Hills, CA, 95762
- */
- /* File: core_main.c
- This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
- */
- #include "coremark.h"
- /* Only support dec number < 1000 */
- static char *dec2str(uint32_t val)
- {
- static char str[4];
- val = val % 1000;
- int decnum = 100;
- for (int i = 0; i < 3; i ++) {
- str[i] = (val / decnum) + '0';
- val = val % decnum;
- decnum = decnum / 10;
- }
- str[3] = '\0';
- return str;
- }
- /* Function: iterate
- Run the benchmark for a specified number of iterations.
- Operation:
- For each type of benchmarked algorithm:
- a - Initialize the data block for the algorithm.
- b - Execute the algorithm N times.
- Returns:
- NULL.
- */
- static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0, (ee_u16)0x3340, (ee_u16)0x6a79, (ee_u16)0xe714, (ee_u16)0xe3c1};
- static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52, (ee_u16)0x1199, (ee_u16)0x5608, (ee_u16)0x1fd7, (ee_u16)0x0747};
- static ee_u16 state_known_crc[] = {(ee_u16)0x5e47, (ee_u16)0x39bf, (ee_u16)0xe5a4, (ee_u16)0x8e3a, (ee_u16)0x8d84};
- void* iterate(void* pres)
- {
- ee_u32 i;
- ee_u16 crc;
- core_results* res = (core_results*)pres;
- ee_u32 iterations = res->iterations;
- res->crc = 0;
- res->crclist = 0;
- res->crcmatrix = 0;
- res->crcstate = 0;
- for (i = 0; i < iterations; i++) {
- crc = core_bench_list(res, 1);
- res->crc = crcu16(crc, res->crc);
- crc = core_bench_list(res, -1);
- res->crc = crcu16(crc, res->crc);
- if (i == 0) {
- res->crclist = res->crc;
- }
- }
- return NULL;
- }
- #if (SEED_METHOD==SEED_ARG)
- ee_s32 get_seed_args(int i, int argc, char* argv[]);
- #define get_seed(x) (ee_s16)get_seed_args(x,argc,argv)
- #define get_seed_32(x) get_seed_args(x,argc,argv)
- #else /* via function or volatile */
- ee_s32 get_seed_32(int i);
- #define get_seed(x) (ee_s16)get_seed_32(x)
- #endif
- #if (MEM_METHOD==MEM_STATIC)
- ee_u8 static_memblk[TOTAL_DATA_SIZE];
- #endif
- char* mem_name[3] = {"Static", "Heap", "Stack"};
- /* Function: main
- Main entry routine for the benchmark.
- This function is responsible for the following steps:
- 1 - Initialize input seeds from a source that cannot be determined at compile time.
- 2 - Initialize memory block for use.
- 3 - Run and time the benchmark.
- 4 - Report results, testing the validity of the output if the seeds are known.
- Arguments:
- 1 - first seed : Any value
- 2 - second seed : Must be identical to first for iterations to be identical
- 3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32.
- 4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs
- */
- #if MAIN_HAS_NOARGC
- MAIN_RETURN_TYPE main(void)
- {
- int argc = 0;
- char* argv[1];
- #else
- MAIN_RETURN_TYPE main(int argc, char* argv[])
- {
- #endif
- ee_u16 i, j = 0, num_algorithms = 0;
- ee_s16 known_id = -1, total_errors = 0;
- ee_u16 seedcrc = 0;
- CORE_TICKS total_time, total_instret;
- core_results results[MULTITHREAD];
- #if (MEM_METHOD==MEM_STACK)
- ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD];
- #endif
- /* first call any initializations needed */
- portable_init(&(results[0].port), &argc, argv);
- /* First some checks to make sure benchmark will run ok */
- if (sizeof(struct list_head_s) > 128) {
- ee_printf("list_head structure too big for comparable data!\r\n");
- return MAIN_RETURN_VAL;
- }
- results[0].seed1 = get_seed(1);
- results[0].seed2 = get_seed(2);
- results[0].seed3 = get_seed(3);
- results[0].iterations = get_seed_32(4);
- #if CORE_DEBUG
- results[0].iterations = 1;
- #endif
- #ifdef CFG_SIMULATION
- // 2024.1.3: 6 iterations are enough for rtl simulation
- #if defined(CPU_SERIES) && CPU_SERIES == 100
- results[0].iterations = 4;
- #else
- results[0].iterations = 6;
- #endif
- #else
- results[0].iterations = ITERATIONS;
- #endif
- ee_printf("Start to run coremark for %u iterations\r\n", (unsigned int)results[0].iterations);
- results[0].execs = get_seed_32(5);
- if (results[0].execs == 0) { /* if not supplied, execute all algorithms */
- results[0].execs = ALL_ALGORITHMS_MASK;
- }
- /* put in some default values based on one seed only for easy testing */
- if ((results[0].seed1 == 0) && (results[0].seed2 == 0) && (results[0].seed3 == 0)) { /* validation run */
- results[0].seed1 = 0;
- results[0].seed2 = 0;
- results[0].seed3 = 0x66;
- }
- if ((results[0].seed1 == 1) && (results[0].seed2 == 0) && (results[0].seed3 == 0)) { /* perfromance run */
- results[0].seed1 = 0x3415;
- results[0].seed2 = 0x3415;
- results[0].seed3 = 0x66;
- }
- #if (MEM_METHOD==MEM_STATIC)
- results[0].memblock[0] = (void*)static_memblk;
- results[0].size = TOTAL_DATA_SIZE;
- results[0].err = 0;
- #if (MULTITHREAD>1)
- #error "Cannot use a static data area with multiple contexts!"
- #endif
- #elif (MEM_METHOD==MEM_MALLOC)
- for (i = 0 ; i < MULTITHREAD; i++) {
- ee_s32 malloc_override = get_seed(7);
- if (malloc_override != 0) {
- results[i].size = malloc_override;
- } else {
- results[i].size = TOTAL_DATA_SIZE;
- }
- results[i].memblock[0] = portable_malloc(results[i].size);
- results[i].seed1 = results[0].seed1;
- results[i].seed2 = results[0].seed2;
- results[i].seed3 = results[0].seed3;
- results[i].err = 0;
- results[i].execs = results[0].execs;
- }
- #elif (MEM_METHOD==MEM_STACK)
- for (i = 0 ; i < MULTITHREAD; i++) {
- results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE;
- results[i].size = TOTAL_DATA_SIZE;
- results[i].seed1 = results[0].seed1;
- results[i].seed2 = results[0].seed2;
- results[i].seed3 = results[0].seed3;
- results[i].err = 0;
- results[i].execs = results[0].execs;
- }
- #else
- #error "Please define a way to initialize a memory block."
- #endif
- /* Data init */
- /* Find out how space much we have based on number of algorithms */
- for (i = 0; i < NUM_ALGORITHMS; i++) {
- if ((1 << (ee_u32)i) & results[0].execs) {
- num_algorithms++;
- }
- }
- for (i = 0 ; i < MULTITHREAD; i++) {
- results[i].size = results[i].size / num_algorithms;
- }
- /* Assign pointers */
- for (i = 0; i < NUM_ALGORITHMS; i++) {
- ee_u32 ctx;
- if ((1 << (ee_u32)i) & results[0].execs) {
- for (ctx = 0 ; ctx < MULTITHREAD; ctx++) {
- results[ctx].memblock[i + 1] = (char*)(results[ctx].memblock[0]) + results[0].size * j;
- }
- j++;
- }
- }
- /* call inits */
- for (i = 0 ; i < MULTITHREAD; i++) {
- if (results[i].execs & ID_LIST) {
- results[i].list = core_list_init(results[0].size, results[i].memblock[1], results[i].seed1);
- }
- if (results[i].execs & ID_MATRIX) {
- core_init_matrix(results[0].size, results[i].memblock[2], (ee_s32)results[i].seed1 | (((ee_s32)results[i].seed2) << 16), &(results[i].mat));
- }
- if (results[i].execs & ID_STATE) {
- core_init_state(results[0].size, results[i].seed1, results[i].memblock[3]);
- }
- }
- /* automatically determine number of iterations if not set */
- if (results[0].iterations == 0) {
- secs_ret secs_passed = 0;
- ee_u32 divisor;
- results[0].iterations = 1;
- while (secs_passed < (secs_ret)1) {
- results[0].iterations *= 10;
- start_time();
- iterate(&results[0]);
- stop_time();
- secs_passed = time_in_secs(get_time());
- }
- /* now we know it executes for at least 1 sec, set actual run time at about 10 secs */
- divisor = (ee_u32)secs_passed;
- if (divisor == 0) { /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */
- divisor = 1;
- }
- results[0].iterations *= 1 + 10 / divisor;
- }
- /* perform actual benchmark */
- start_time();
- start_instret();
- #if (MULTITHREAD>1)
- if (default_num_contexts > MULTITHREAD) {
- default_num_contexts = MULTITHREAD;
- }
- for (i = 0 ; i < default_num_contexts; i++) {
- results[i].iterations = results[0].iterations;
- results[i].execs = results[0].execs;
- core_start_parallel(&results[i]);
- }
- for (i = 0 ; i < default_num_contexts; i++) {
- core_stop_parallel(&results[i]);
- }
- #else
- iterate(&results[0]);
- #endif
- stop_time();
- stop_instret();
- total_time = get_time();
- total_instret = get_instret();
- /* get a function of the input to report */
- seedcrc = crc16(results[0].seed1, seedcrc);
- seedcrc = crc16(results[0].seed2, seedcrc);
- seedcrc = crc16(results[0].seed3, seedcrc);
- seedcrc = crc16(results[0].size, seedcrc);
- switch (seedcrc) { /* test known output for common seeds */
- case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
- known_id = 0;
- ee_printf("6k performance run parameters for coremark.\n");
- break;
- case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per algorithm */
- known_id = 1;
- ee_printf("6k validation run parameters for coremark.\n");
- break;
- case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm */
- known_id = 2;
- ee_printf("Profile generation run parameters for coremark.\n");
- break;
- case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
- known_id = 3;
- ee_printf("2K performance run parameters for coremark.\n");
- break;
- case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per algorithm */
- known_id = 4;
- ee_printf("2K validation run parameters for coremark.\n");
- break;
- default:
- total_errors = -1;
- break;
- }
- if (known_id >= 0) {
- for (i = 0 ; i < default_num_contexts; i++) {
- results[i].err = 0;
- if ((results[i].execs & ID_LIST) &&
- (results[i].crclist != list_known_crc[known_id])) {
- ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n", i, results[i].crclist, list_known_crc[known_id]);
- results[i].err++;
- }
- if ((results[i].execs & ID_MATRIX) &&
- (results[i].crcmatrix != matrix_known_crc[known_id])) {
- ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n", i, results[i].crcmatrix, matrix_known_crc[known_id]);
- results[i].err++;
- }
- if ((results[i].execs & ID_STATE) &&
- (results[i].crcstate != state_known_crc[known_id])) {
- ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n", i, results[i].crcstate, state_known_crc[known_id]);
- results[i].err++;
- }
- total_errors += results[i].err;
- }
- }
- total_errors += check_data_types();
- /* and report results */
- ee_printf("CoreMark Size : %u\n", (unsigned int)results[0].size);
- ee_printf("Total ticks : %u\n", (unsigned int)total_time);
- #if HAS_FLOAT
- ee_printf("Total time (secs): %f\n", time_in_secs(total_time));
- if (time_in_secs(total_time) > 0) {
- ee_printf("Iterations/Sec : %f\n", default_num_contexts * results[0].iterations / time_in_secs(total_time));
- }
- #else
- ee_printf("Total time (secs): %d\n", time_in_secs(total_time));
- if (time_in_secs(total_time) > 0) {
- ee_printf("Iterations/Sec : %d\n", default_num_contexts * results[0].iterations / time_in_secs(total_time));
- }
- #endif
- #ifdef CFG_SIMULATION
- //Bob: for simulation we just comment this out
- #else
- if (time_in_secs(total_time) < 10) {
- ee_printf("ERROR! Must execute for at least 10 secs for a valid result!\n");
- total_errors++;
- }
- #endif
- ee_printf("Iterations : %u\n", (unsigned int)(default_num_contexts * results[0].iterations));
- ee_printf("Compiler version : %s\n", COMPILER_VERSION);
- ee_printf("Compiler flags : %s\n", COMPILER_FLAGS);
- #if (MULTITHREAD>1)
- ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts);
- #endif
- ee_printf("Memory location : %s\n", MEM_LOCATION);
- /* output for verification */
- ee_printf("seedcrc : 0x%04x\n", seedcrc);
- if (results[0].execs & ID_LIST)
- for (i = 0 ; i < default_num_contexts; i++) {
- ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist);
- }
- if (results[0].execs & ID_MATRIX)
- for (i = 0 ; i < default_num_contexts; i++) {
- ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix);
- }
- if (results[0].execs & ID_STATE)
- for (i = 0 ; i < default_num_contexts; i++) {
- ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate);
- }
- for (i = 0 ; i < default_num_contexts; i++) {
- ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc);
- }
- if (total_errors == 0) {
- ee_printf("Correct operation validated. See readme.txt for run and reporting rules.\n");
- #if HAS_FLOAT
- if (known_id == 3) {
- ee_printf("CoreMark 1.0 : %f / %s %s", default_num_contexts * results[0].iterations / time_in_secs(total_time), COMPILER_VERSION, COMPILER_FLAGS);
- #if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
- ee_printf(" / %s", MEM_LOCATION);
- #else
- ee_printf(" / %s", mem_name[MEM_METHOD]);
- #endif
- #if (MULTITHREAD>1)
- ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD);
- #endif
- ee_printf("\n");
- }
- #endif
- }
- if (total_errors > 0) {
- ee_printf("Errors detected\n");
- }
- if (total_errors < 0) {
- ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n");
- }
- #if (MEM_METHOD==MEM_MALLOC)
- for (i = 0 ; i < MULTITHREAD; i++) {
- portable_free(results[i].memblock[0]);
- }
- #endif
- /* And last call any target specific code for finalizing */
- portable_fini(&(results[0].port));
- float coremark_dmips = ((uint64_t)results[0].iterations * 1000000) / (float)total_time;
- if ((total_time >> 32) & 0xFFFFFFFF) {
- printf("WARNING: Total ticks higher 32bit has value, please take care, higher 32bit 0x%x, lower 32bit 0x%x\n", \
- (unsigned int)(total_time >> 32), (unsigned int)total_time);
- }
- #if HAS_FLOAT
- ee_printf("\n");
- ee_printf("\n");
- ee_printf("Print Personal Added Addtional Info to Easy Visual Analysis\n");
- ee_printf("\n");
- ee_printf(" (Iterations is: %u\n", (unsigned int)results[0].iterations);
- ee_printf(" (total_ticks is: %u\n", (unsigned int)total_time);
- ee_printf(" (*) Assume the core running at 1 MHz\n");
- ee_printf(" So the CoreMark/MHz can be calculated by: \n");
- ee_printf(" (Iterations*1000000/total_ticks) = %2.6f CoreMark/MHz\n", coremark_dmips);
- ee_printf("\n");
- #endif
- uint32_t cmk_dmips = (uint32_t)(coremark_dmips * 1000);
- char *pstr = dec2str(cmk_dmips);
- ee_printf("\nCSV, Benchmark, Iterations, Cycles, CoreMark/MHz\n");
- ee_printf("CSV, CoreMark, %u, %u, %u.%s\n", \
- (unsigned int)results[0].iterations, (unsigned int)total_time, (unsigned int)(cmk_dmips/1000), pstr);
- float f_ipc = (((float)total_instret / total_time));
- uint32_t i_ipc = (uint32_t)(f_ipc * 1000);
- pstr = dec2str(i_ipc);
- ee_printf("IPC = Instret/Cycle = %u/%u = %u.%s\n", (unsigned int)total_instret, (unsigned int)total_time, (unsigned int)(i_ipc/1000), pstr);
- return MAIN_RETURN_VAL;
- }
|