Nuclei-Software
/
nuclei-sdk
зеркало из https://github-proxy.rt-thread.io/Nuclei-Software/nuclei-sdk.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
							/*
 * Compile Linux Intel
 *  cc  whets.c cpuidc.c -lm -lrt -O3 -o whetstoneIL
 *
 * Cross Compile on Linux Intel for ARM
 * ~/toolchain/raspbian-toolchain-gcc-4.7.2-linux32/bin/arm-linux-gnueabihf-gcc
 whets.c cpuidc.c -lm -lrt -O3 -march=armv6 -mfloat-abi=hard -mfpu=vfp -o
 whetstonePiA6
 *
 * Compile on Raspberry Pi
 * gcc  whets.c cpuidc.c -lm -lrt -O3 -march=armv6 -mfloat-abi=hard -mfpu=vfp -o
 whetstonePiA6
 *
 *************************************************************************
 *
 *  Document:         Whets.c
 *  File Group:       Classic Benchmarks
 *  Creation Date:    6 November 1996
 *  Revision Date:    6 November 2010 Ubuntu Version for PCs
 *
 *  Title:            Whetstone Benchmark in C/C++
 *  Keywords:         WHETSTONE BENCHMARK PERFORMANCE MIPS
 *                    MWIPS MFLOPS
 *
 *  Abstract:         C or C++ version of Whetstone one of the
 *                    Classic Numeric Benchmarks with example
 *                    results on P3 to P6 based PCs.
 *
 *  Contributor:      roy@roylongbottom.org.uk
 *
 ************************************************************
 *
 *     C/C++ Whetstone Benchmark Single or Double Precision
 *
 *     Original concept        Brian Wichmann NPL      1960's
 *     Original author         Harold Curnow  CCTA     1972
 *     Self timing versions    Roy Longbottom CCTA     1978/87
 *     Optimisation control    Bangor University       1987/90
 *     C/C++ Version           Roy Longbottom          1996
 *     Compatibility & timers  Al Aburto               1996
 *
 ************************************************************
 *
 *              Official version approved by:
 *
 *         Harold Curnow  100421.1615@compuserve.com
 *
 *      Happy 25th birthday Whetstone, 21 November 1997
 *
 ************************************************************
 *
 *     The program normally runs for about 100 seconds
 *     (adjustable in main - variable duration). This time
 *     is necessary because of poor PC clock resolution.
 *     The original concept included such things as a given
 *     number of subroutine calls and divides which may be
 *     changed by optimisation. For comparison purposes the
 *     compiler and level of optimisation should be identified.
 *
 *     This version is set to run for 10 seconds using high
 *     resolution timer.
 *
 ************************************************************
 *
 *     The original benchmark had a single variable I which
 *     controlled the running time. Constants with values up
 *     to 899 were multiplied by I to control the number
 *     passes for each loop. It was found that large values
 *     of I could overflow index registers so an extra outer
 *     loop with a second variable J was added.
 *
 *     Self timing versions were produced during the early
 *     days. The 1978 changes supplied timings of individual
 *     loops and these were used later to produce MFLOPS and
 *     MOPS ratings.
 *
 *     1987 changes converted the benchmark to Fortran 77
 *     standards and removed redundant IF statements and
 *     loops to leave the 8 active loops N1 to N8. Procedure
 *     P3 was changed to use global variables to avoid over-
 *     optimisation with the first two statements changed from
 *     X1=X and Y1=Y to X=Y and Y=Z. A self time calibrating
 *     version for PCs was also produced, the facility being
 *     incorporated in this version.
 *
 *     This version has changes to avoid worse than expected
 *     speed ratings, due to underflow, and facilities to show
 *     that consistent numeric output is produced with varying
 *     optimisation levels or versions in different languages.
 *
 *     Some of the procedures produce ever decreasing numbers.
 *     To avoid problems, variables T and T1 have been changed
 *     from 0.499975 and 0.50025 to 0.49999975 and 0.50000025.
 *
 *     Each section now has its own double loop. Inner loops
 *     are run 100 times the loop constants. Calibration
 *     determines the number of outer loop passes. The
 *     numeric results produced in the main output are for
 *     one pass on the outer loop. As underflow problems were
 *     still likely on a processor 100 times faster than a 100
 *     MHz Pentium, three sections have T=1.0-T inserted in the
 *     outer loop to avoid the problem. The two loops avoid
 *     index register overflows.
 *
 *     The first section is run ten times longer than required
 *     for accuracy in calculating MFLOPS. This time is divided
 *     by ten for inclusion in the MWIPS calculations.
 *
 *     Early version has facilities for typing in details of
 *     the particular run, appended to file whets.txt along
 *     with the results. This version attemps to obtain these
 *     automatically.
 *
 *     2010 Section 4 modified slightly to avoid over optimisation
 *     by GCC compiler
 *
 *     Roy Longbottom  roy@roylongbottom.org.uk
 *
 ************************************************************
 *
 *     Whetstone benchmark results, further details of the
 *     benchmarks and history are available from:
 *
 *     http://www.roylongbottom.org.uk/whetstone%20results.htm
 *     http://www.roylongbottom.org.uk/whetstone.htm
 *
 ************************************************************
 *
 *     Source code is available in C/C++, Fortran, Basic and
 *     Visual Basic in the same format as this version. Pre-
 *     compiled versions for PCs are also available via C++.
 *     These comprise optimised and non-optimised versions
 *     for DOS, Windows and NT. See:
 *
 *     http://www.roylongbottom.org.uk/whetstone%20results.htm
 *
 ************************************************************
 *
 * Example of initial calibration display (Pentium 100 MHz)
 *
 * Single Precision C/C++ Whetstone Benchmark
 *
 * Calibrate
 *      0.17 Seconds          1   Passes (x 100)
 *      0.77 Seconds          5   Passes (x 100)
 *      3.70 Seconds         25   Passes (x 100)
 *
 * Use 676  passes (x 100)
 *
 * 676 passes are used for an approximate duration of 100
 * seconds, providing an initial estimate of a speed rating
 * of 67.6 MWIPS.
 *
 * This is followed by the table of results as below.

 * Whetstone Single  Precision Benchmark in C/C++
 *
 * Loop content                 Result            MFLOPS     MOPS   Seconds
 *
 * N1 floating point    -1.12475025653839100      19.971              0.274
 * N2 floating point    -1.12274754047393800      11.822              3.240
 * N3 if then else       1.00000000000000000               11.659     2.530
 * N4 fixed point       12.00000000000000000               13.962     6.430
 * N5 sin,cos etc.       0.49904659390449520                2.097    11.310
 * N6 floating point     0.99999988079071040       3.360             45.750
 * N7 assignments        3.00000000000000000                2.415    21.810
 * N8 exp,sqrt etc.      0.75110864639282230                1.206     8.790
 *
 * MWIPS                                          28.462            100.134
 *
 *  Note different numeric results to single precision. Slight variations
 *  are normal with different compilers and sometimes optimisation levels.
 *
 **************************************************************************
 */

#include <math.h>       /* for sin, exp etc.           */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// zcc report error stdatomic.h:201:17: error: unknown type name 'int_least8_t'; did you mean '__int_least8_t'?
//#include "stdatomic.h"
#include "nuclei_sdk_soc.h"

#include "config.h"
#include "cpuidh.h"

/*PRECISION PRECISION PRECISION PRECISION PRECISION PRECISION PRECISION*/

#define Version "Roy Longbottom Version"

void whetstones(long xtra, long x100, int calibrate);
void pa(SPDP e[4], SPDP t, SPDP t2);
void po(SPDP e1[4], long j, long k, long l);
void p3(SPDP* x, SPDP* y, SPDP* z, SPDP t, SPDP t1, SPDP t2);
void pout(char *title, float ops, int type, SPDP checknum, SPDP time,
          int calibrate, int section);

static SPDP loop_time[9];
static SPDP loop_mops[9];
static SPDP loop_mflops[9];
static SPDP TimeUsed;
static SPDP mwips, mwips_mhz;
static char headings[9][22];
static SPDP Check;
static SPDP results[9];

static uint64_t start_cycle, end_cycle, used_cycle;
static uint64_t start_instret, end_instret, used_instret;

/* Only support dec number < 1000 */
static char *dec2str(uint32_t val)
{
    static char str[4];
    val = val % 1000;
    int decnum = 100;
    for (int i = 0; i < 3; i ++) {
        str[i] = val / decnum + '0';
        val = val % decnum;
        decnum = decnum / 10;
    }
    str[3] = '\0';
    return str;
}

int main(void)
{
    int count = 10, calibrate = 1;
    long xtra = 1;

#if defined(CPU_SERIES) && CPU_SERIES == 100
    long x100 = 10;
#else
    long x100 = 100;

    //NOTE: when no fpu present, use less passes
#ifndef __riscv_flen
    x100 = x100 >> 2;
#endif
#endif

#if CFG_SIMULATION
    int duration = 1;
#else
    int duration = 3;
#endif

    printf("\n");
#if defined(CPU_SERIES) && CPU_SERIES < 300
    printf("100 and 200 series CPU have no FPU, running Whetstone is meaningless for these CPU.\n");
#endif
    printf("##########################################\n");
    printf("%s Precision C Whetstone Benchmark %s \n", Precision, Version);

    printf("Calibrate\n");
    do {
        TimeUsed = 0;

        whetstones(xtra, x100, calibrate);
        printf("%11.2f Seconds %10.0lf   Passes (x %d)\n", TimeUsed,
               (SPDP)(xtra), x100);
        calibrate++;
        count--;

#if CFG_SIMULATION
        if (TimeUsed > 0.02)
#else
#if defined(CPU_SERIES) && CPU_SERIES == 100
        if (TimeUsed > 0.1)
#else
        if (TimeUsed > 0.2)
#endif
#endif
        {
            count = 0;
        } else {
            xtra = xtra * 5;
        }
    } while (count > 0);

    if (TimeUsed > 0) {
        xtra = (long)((SPDP)(duration * xtra) / TimeUsed);
    }
    if (xtra < 1) {
        xtra = 1;
    }

    calibrate = 0;

    printf("\nUse %u  passes (x %d)\n", (uint32_t)xtra, x100);
    printf("\n          %s Precision C/C++ Whetstone Benchmark", Precision);

#ifdef PRECOMP
    printf("\n          Compiler  %s", precompiler);
    printf("\n          Options   %s\n", preoptions);
#else
    printf("\n");
#endif

    printf("\nLoop content                  Result              MFLOPS "
           "     MOPS   Seconds\n\n");

    // reset instret and cycle
    __set_rv_cycle(0);
    __set_rv_instret(0);
    start_cycle = __get_rv_cycle();
    start_instret = __get_rv_instret();

    TimeUsed = 0;
    whetstones(xtra, x100, calibrate);

    end_cycle = __get_rv_cycle();
    end_instret = __get_rv_instret();
    used_cycle = end_cycle - start_cycle;
    used_instret = end_instret - start_instret;

    printf("\nMWIPS            ");
    if (TimeUsed > 0) {
        mwips = (float)(xtra) * (float)(x100) / (10 * TimeUsed);
    } else {
        mwips = 0;
    }

    printf("%39.3f%19.3f\n\n", mwips, TimeUsed);

    printf("\nMWIPS/MHz        ");

    mwips_mhz = mwips / SystemCoreClock * 1000000;
    printf("%39.3f%19.3f\n\n", mwips_mhz, TimeUsed);

    uint32_t whet_mwips = (uint32_t)(mwips_mhz * 1000);
    char *pstr = dec2str(whet_mwips);
    printf("\nCSV, Benchmark, MWIPS/MHz\n");
    printf("CSV, Whetstone, %u.%s\n", (unsigned int)(whet_mwips/1000), pstr);

    float f_ipc = (((float)used_instret / used_cycle));
    uint32_t i_ipc = (uint32_t)(f_ipc * 1000);
    pstr = dec2str(i_ipc);

    printf("IPC = Instret/Cycle = %u/%u = %u.%s\n", (unsigned int)used_instret, (unsigned int)used_cycle, (unsigned int)(i_ipc/1000), pstr);

    if (Check == 0) {
        printf("Wrong answer  \n");
        return -1;
    }

    return 0;
}

void whetstones(long xtra, long x100, int calibrate)
{

    long n1, n2, n3, n4, n5, n6, n7, n8, i, ix, n1mult;
    SPDP x, y, z;
    long j, k, l;
    SPDP e1[4];

    SPDP t = 0.49999975;
    SPDP t0 = t;
    SPDP t1 = 0.50000025;
    SPDP t2 = 2.0;

    Check = 0.0;

    n1 = 12 * x100;
    n2 = 14 * x100;
    n3 = 345 * x100;
    n4 = 210 * x100;
    n5 = 32 * x100;
    n6 = 899 * x100;
    n7 = 616 * x100;
    n8 = 93 * x100;
    n1mult = 10;

    /* Section 1, Array elements */

    e1[0] = 1.0;
    e1[1] = -1.0;
    e1[2] = -1.0;
    e1[3] = -1.0;
    start_time();
    {
        for (ix = 0; ix < xtra; ix++) {
            for (i = 0; i < n1 * n1mult; i++) {
                e1[0] = (e1[0] + e1[1] + e1[2] - e1[3]) * t;
                e1[1] = (e1[0] + e1[1] - e1[2] + e1[3]) * t;
                e1[2] = (e1[0] - e1[1] + e1[2] + e1[3]) * t;
                e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3]) * t;
            }
            t = 1.0 - t;
        }
        t = t0;
    }
    end_time();
    secs = secs / (SPDP)(n1mult);
    pout("N1 floating point\0", (float)(n1 * 16) * (float)(xtra), 1, e1[3],
         secs, calibrate, 1);

    /* Section 2, Array as parameter */

    start_time();
    {
        for (ix = 0; ix < xtra; ix++) {
            for (i = 0; i < n2; i++) {
                pa(e1, t, t2);
            }
            t = 1.0 - t;
        }
        t = t0;
    }
    end_time();
    pout("N2 floating point\0", (float)(n2 * 96) * (float)(xtra), 1, e1[3],
         secs, calibrate, 2);

    /* Section 3, Conditional jumps */
    j = 1;
    start_time();
    {
        for (ix = 0; ix < xtra; ix++) {
            for (i = 0; i < n3; i++) {
                if (j == 1) {
                    j = 2;
                } else {
                    j = 3;
                }
                if (j > 2) {
                    j = 0;
                } else {
                    j = 1;
                }
                if (j < 1) {
                    j = 1;
                } else {
                    j = 0;
                }
            }
        }
    }
    end_time();
    pout("N3 if then else  \0", (float)(n3 * 3) * (float)(xtra), 2, (SPDP)(j),
         secs, calibrate, 3);

    /* Section 4, Integer arithmetic */
    j = 1;
    k = 2;
    l = 3;
    e1[0] = 0.0;
    e1[1] = 0.0;
    start_time();
    {
        for (ix = 0; ix < xtra; ix++) {
            for (i = 0; i < n4; i++) {
                j = j * (k - j) * (l - k);
                k = l * k - (l - j) * k;
                l = (l - k) * (k + j);
                e1[l - 2] = e1[l - 2] + j + k + l;
                e1[k - 2] = e1[k - 2] + j * k * l;
                //  was          e1[l-2] = j + k + l; and  e1[k-2] = j * k * l;
            }
        }
    }
    end_time();
    x = (e1[0] + e1[1]) / (SPDP)n4 / (SPDP)xtra; // was x = e1[0]+e1[1];
    pout("N4 fixed point   \0", (float)(n4 * 15) * (float)(xtra), 2, x, secs,
         calibrate, 4);

    /* Section 5, Trig functions */
    x = 0.5;
    y = 0.5;
    start_time();
    {
        for (ix = 0; ix < xtra; ix++) {
            for (i = 1; i < n5; i++) {
                x = t * atan(t2 * sin(x) * cos(x) /
                             (cos(x + y) + cos(x - y) - 1.0));
                y = t * atan(t2 * sin(y) * cos(y) /
                             (cos(x + y) + cos(x - y) - 1.0));
            }
            t = 1.0 - t;
        }
        t = t0;
    }
    end_time();
    pout("N5 sin,cos etc.  \0", (float)(n5 * 26) * (float)(xtra), 2, y, secs,
         calibrate, 5);

    /* Section 6, Procedure calls */
    x = 1.0;

    y = 1.0;
    z = 1.0;
    start_time();
    {
        for (ix = 0; ix < xtra; ix++) {
            for (i = 0; i < n6; i++) {
                p3(&x, &y, &z, t, t1, t2);
            }
        }
    }
    end_time();
    pout("N6 floating point\0", (float)(n6 * 6) * (float)(xtra), 1, z, secs,
         calibrate, 6);

    /* Section 7, Array refrences */
    j = 0;
    k = 1;
    l = 2;
    e1[0] = 1.0;
    e1[1] = 2.0;
    e1[2] = 3.0;
    start_time();
    {
        for (ix = 0; ix < xtra; ix++) {
            for (i = 0; i < n7; i++) {
                po(e1, j, k, l);
            }
        }
    }
    end_time();
    pout("N7 assignments   \0", (float)(n7 * 3) * (float)(xtra), 2, e1[2], secs,
         calibrate, 7);

    /* Section 8, Standard functions */
    x = 0.75;
    start_time();
    {
        for (ix = 0; ix < xtra; ix++) {
            for (i = 0; i < n8; i++) {
                x = sqrt(exp(log(x) / t1));
            }
        }
    }
    end_time();
    pout("N8 exp,sqrt etc. \0", (float)(n8 * 4) * (float)(xtra), 2, x, secs,
         calibrate, 8);

    return;
}

void pa(SPDP e[4], SPDP t, SPDP t2)
{
    long j;
    for (j = 0; j < 6; j++) {
        e[0] = (e[0] + e[1] + e[2] - e[3]) * t;
        e[1] = (e[0] + e[1] - e[2] + e[3]) * t;
        e[2] = (e[0] - e[1] + e[2] + e[3]) * t;
        e[3] = (-e[0] + e[1] + e[2] + e[3]) / t2;
    }

    return;
}

void po(SPDP e1[4], long j, long k, long l)
{
    e1[j] = e1[k];
    e1[k] = e1[l];
    e1[l] = e1[j];
    return;
}

void p3(SPDP* x, SPDP* y, SPDP* z, SPDP t, SPDP t1, SPDP t2)
{
    *x = *y;
    *y = *z;
    *x = t * (*x + *y);
    *y = t1 * (*x + *y);
    *z = (*x + *y) / t2;
    return;
}

void pout(char *title, float ops, int type, SPDP checknum, SPDP time,
          int calibrate, int section)
{
    SPDP mops, mflops;

    Check = Check + checknum;
    loop_time[section] = time;
    strcpy(headings[section], title);
    TimeUsed = TimeUsed + time;
    if (calibrate == 1)

    {
        results[section] = checknum;
    }
    if (calibrate == 0) {
        printf("%s %20.17f    ", headings[section], results[section]);

        if (type == 1) {
            if (time > 0) {
                mflops = ops / (1000000L * time);
            } else {
                mflops = 0;
            }
            loop_mops[section] = 99999;
            loop_mflops[section] = mflops;
            printf(" %9.3f          %9.3f\n", loop_mflops[section],
                   loop_time[section]);
        } else {
            if (time > 0) {
                mops = ops / (1000000L * time);
            } else {
                mops = 0;
            }
            loop_mops[section] = mops;
            loop_mflops[section] = 0;
            printf("           %9.3f%9.3f\n", loop_mops[section],
                   loop_time[section]);
        }
    }

    return;
}