/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

/* **************************************
 *  Tuning parameters
 ****************************************/
#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */
#    define BMK_TIMETEST_DEFAULT_S 3
#endif

/* *************************************
 *  Includes
 ***************************************/
/* this must be included first */
#include "platform.h" /* Large Files support, compiler specifics */

/* then following system includes */
#include <assert.h> /* assert */
#include <errno.h>
#include <stdio.h>    /* fprintf, fopen */
#include <stdlib.h>   /* malloc, free */
#include <string.h>   /* memset, strerror */
#include "util.h"     /* UTIL_getFileSize, UTIL_sleep */
#include "../lib/common/mem.h"
#include "benchfn.h"
#include "timefn.h" /* UTIL_time_t */
#ifndef ZSTD_STATIC_LINKING_ONLY
#    define ZSTD_STATIC_LINKING_ONLY
#endif
#include "../lib/zstd.h"
#include "datagen.h" /* RDG_genBuffer */
#include "lorem.h"   /* LOREM_genBuffer */
#ifndef XXH_INLINE_ALL
#    define XXH_INLINE_ALL
#endif
#include "../lib/common/xxhash.h"
#include "../lib/zstd_errors.h"
#include "benchzstd.h"

/* *************************************
 *  Constants
 ***************************************/
#ifndef ZSTD_GIT_COMMIT
#    define ZSTD_GIT_COMMIT_STRING ""
#else
#    define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
#endif

#define TIMELOOP_MICROSEC (1 * 1000000ULL)             /* 1 second */
#define TIMELOOP_NANOSEC (1 * 1000000000ULL)           /* 1 second */
#define ACTIVEPERIOD_MICROSEC (70 * TIMELOOP_MICROSEC) /* 70 seconds */
#define COOLPERIOD_SEC 10

#define KB *(1 << 10)
#define MB *(1 << 20)
#define GB *(1U << 30)

#define BMK_RUNTEST_DEFAULT_MS 1000

static const size_t maxMemory = (sizeof(size_t) == 4)
        ?
        /* 32-bit */ (2 GB - 64 MB)
        :
        /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t) * 8) - 31));

/* *************************************
 *  console display
 ***************************************/
#define DISPLAY(...)                  \
    {                                 \
        fprintf(stderr, __VA_ARGS__); \
        fflush(NULL);                 \
    }
#define DISPLAYLEVEL(l, ...)  \
    if (displayLevel >= l) {  \
        DISPLAY(__VA_ARGS__); \
    }
/* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : +
 * progression;   4 : + information */
#define OUTPUT(...)                   \
    {                                 \
        fprintf(stdout, __VA_ARGS__); \
        fflush(NULL);                 \
    }
#define OUTPUTLEVEL(l, ...)  \
    if (displayLevel >= l) { \
        OUTPUT(__VA_ARGS__); \
    }

/* *************************************
 *  Exceptions
 ***************************************/
#ifndef DEBUG
#    define DEBUG 0
#endif
#define DEBUGOUTPUT(...)          \
    {                             \
        if (DEBUG)                \
            DISPLAY(__VA_ARGS__); \
    }

#define RETURN_ERROR_INT(errorNum, ...)                \
    {                                                  \
        DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
        DISPLAYLEVEL(1, "Error %i : ", errorNum);      \
        DISPLAYLEVEL(1, __VA_ARGS__);                  \
        DISPLAYLEVEL(1, " \n");                        \
        return errorNum;                               \
    }

#define CHECK_Z(zf)                                                  \
    {                                                                \
        size_t const zerr = zf;                                      \
        if (ZSTD_isError(zerr)) {                                    \
            DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);           \
            DISPLAY("Error : ");                                     \
            DISPLAY("%s failed : %s", #zf, ZSTD_getErrorName(zerr)); \
            DISPLAY(" \n");                                          \
            exit(1);                                                 \
        }                                                            \
    }

#define RETURN_ERROR(errorNum, retType, ...)           \
    {                                                  \
        retType r;                                     \
        memset(&r, 0, sizeof(retType));                \
        DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
        DISPLAYLEVEL(1, "Error %i : ", errorNum);      \
        DISPLAYLEVEL(1, __VA_ARGS__);                  \
        DISPLAYLEVEL(1, " \n");                        \
        r.tag = errorNum;                              \
        return r;                                      \
    }

/* replacement for snprintf(), which is not supported by C89
 * sprintf() would be the supported one, but it's labelled unsafe,
 * so some modern static analyzer will flag it as such, making it unusable.
 * formatString_u() replaces snprintf() for the specific case where there are only %u arguments */
static int formatString_u(char* buffer, size_t buffer_size, const char* formatString, unsigned int value)
{
    size_t written = 0;
    int i;
    assert(value <= 100);

    for (i = 0; formatString[i] != '\0' && written < buffer_size - 1; ++i) {
        if (formatString[i] != '%') {
            buffer[written++] = formatString[i];
            continue;
        }

        if (formatString[++i] == 'u') {
            /* Handle single digit */
            if (value < 10) {
                buffer[written++] = '0' + (char)value;
            } else if (value < 100) {
                /* Handle two digits */
                if (written >= buffer_size - 2) {
                    return -1; /* buffer overflow */
                }
                buffer[written++] = '0' + (char)(value / 10);
                buffer[written++] = '0' + (char)(value % 10);
            } else { /* 100 */
                if (written >= buffer_size - 3) {
                    return -1; /* buffer overflow */
                }
                buffer[written++] = '1';
                buffer[written++] = '0';
                buffer[written++] = '0';
            }
        } else if (formatString[i] == '%') { /* Check for escaped percent sign */
            buffer[written++] = '%';
        } else {
            return -1; /* unsupported format */
        }
    }

    if (written < buffer_size) {
        buffer[written] = '\0';
    } else {
        buffer[0] = '\0'; /* Handle truncation */
    }

    return (int)written;
}

/* *************************************
 *  Benchmark Parameters
 ***************************************/

BMK_advancedParams_t BMK_initAdvancedParams(void)
{
    BMK_advancedParams_t const res = {
        BMK_both,               /* mode */
        BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
        0,                      /* blockSize */
        0,               /* targetCBlockSize */
        0,                      /* nbWorkers */
        0,                      /* realTime */
        0,                      /* additionalParam */
        0,                      /* ldmFlag */
        0,                      /* ldmMinMatch */
        0,                      /* ldmHashLog */
        0,                      /* ldmBuckSizeLog */
        0,                      /* ldmHashRateLog */
        ZSTD_ps_auto,           /* literalCompressionMode */
        0                       /* useRowMatchFinder */
    };
    return res;
}

/* ********************************************************
 *  Bench functions
 **********************************************************/
typedef struct {
    const void* srcPtr;
    size_t srcSize;
    void* cPtr;
    size_t cRoom;
    size_t cSize;
    void* resPtr;
    size_t resSize;
} blockParam_t;

#undef MIN
#undef MAX
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))

static void BMK_initCCtx(
        ZSTD_CCtx* ctx,
        const void* dictBuffer,
        size_t dictBufferSize,
        int cLevel,
        const ZSTD_compressionParameters* comprParams,
        const BMK_advancedParams_t* adv)
{
    ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
    if (adv->nbWorkers == 1) {
        CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
    } else {
        CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
    }
    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx,
            ZSTD_c_literalCompressionMode,
            (int)adv->literalCompressionMode));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_strategy, (int)comprParams->strategy));
    CHECK_Z(ZSTD_CCtx_setParameter(
            ctx, ZSTD_c_targetCBlockSize, (int)adv->targetCBlockSize));
    CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
}

static void
BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize)
{
    CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
    CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize));
}

typedef struct {
    ZSTD_CCtx* cctx;
    const void* dictBuffer;
    size_t dictBufferSize;
    int cLevel;
    const ZSTD_compressionParameters* comprParams;
    const BMK_advancedParams_t* adv;
} BMK_initCCtxArgs;

static size_t local_initCCtx(void* payload)
{
    BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
    BMK_initCCtx(
            ag->cctx,
            ag->dictBuffer,
            ag->dictBufferSize,
            ag->cLevel,
            ag->comprParams,
            ag->adv);
    return 0;
}

typedef struct {
    ZSTD_DCtx* dctx;
    const void* dictBuffer;
    size_t dictBufferSize;
} BMK_initDCtxArgs;

static size_t local_initDCtx(void* payload)
{
    BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
    BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
    return 0;
}

/* `addArgs` is the context */
static size_t local_defaultCompress(
        const void* srcBuffer,
        size_t srcSize,
        void* dstBuffer,
        size_t dstSize,
        void* addArgs)
{
    ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
    return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
}

/* `addArgs` is the context */
static size_t local_defaultDecompress(
        const void* srcBuffer,
        size_t srcSize,
        void* dstBuffer,
        size_t dstCapacity,
        void* addArgs)
{
    size_t moreToFlush    = 1;
    ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
    ZSTD_inBuffer in;
    ZSTD_outBuffer out;
    in.src   = srcBuffer;
    in.size  = srcSize;
    in.pos   = 0;
    out.dst  = dstBuffer;
    out.size = dstCapacity;
    out.pos  = 0;
    while (moreToFlush) {
        if (out.pos == out.size) {
            return (size_t)-ZSTD_error_dstSize_tooSmall;
        }
        moreToFlush = ZSTD_decompressStream(dctx, &out, &in);
        if (ZSTD_isError(moreToFlush)) {
            return moreToFlush;
        }
    }
    return out.pos;
}

/* ================================================================= */
/*      Benchmark Zstandard, mem-to-mem scenarios                    */
/* ================================================================= */

int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
{
    return outcome.tag == 0;
}

BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
{
    assert(outcome.tag == 0);
    return outcome.internal_never_use_directly;
}

static BMK_benchOutcome_t BMK_benchOutcome_error(void)
{
    BMK_benchOutcome_t b;
    memset(&b, 0, sizeof(b));
    b.tag = 1;
    return b;
}

static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(
        BMK_benchResult_t result)
{
    BMK_benchOutcome_t b;
    b.tag                         = 0;
    b.internal_never_use_directly = result;
    return b;
}

/* benchMem with no allocation */
static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
        const void** srcPtrs,
        size_t* srcSizes,
        void** cPtrs,
        size_t* cCapacities,
        size_t* cSizes,
        void** resPtrs,
        size_t* resSizes,
        void** resultBufferPtr,
        void* compressedBuffer,
        size_t maxCompressedSize,
        BMK_timedFnState_t* timeStateCompress,
        BMK_timedFnState_t* timeStateDecompress,

        const void* srcBuffer,
        size_t srcSize,
        const size_t* fileSizes,
        unsigned nbFiles,
        const int cLevel,
        const ZSTD_compressionParameters* comprParams,
        const void* dictBuffer,
        size_t dictBufferSize,
        ZSTD_CCtx* cctx,
        ZSTD_DCtx* dctx,
        int displayLevel,
        const char* displayName,
        const BMK_advancedParams_t* adv)
{
    size_t const blockSize =
            ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
                     ? adv->blockSize
                     : srcSize)
            + (!srcSize); /* avoid div by 0 */
    BMK_benchResult_t benchResult;
    size_t const loadedCompressedSize = srcSize;
    size_t cSize                      = 0;
    double ratio                      = 0.;
    U32 nbBlocks;

    assert(cctx != NULL);
    assert(dctx != NULL);

    /* init */
    memset(&benchResult, 0, sizeof(benchResult));
    if (strlen(displayName) > 17)
        displayName +=
                strlen(displayName) - 17; /* display last 17 characters */
    if (adv->mode == BMK_decodeOnly) {
        /* benchmark only decompression : source must be already compressed */
        const char* srcPtr = (const char*)srcBuffer;
        U64 totalDSize64   = 0;
        U32 fileNb;
        for (fileNb = 0; fileNb < nbFiles; fileNb++) {
            U64 const fSize64 =
                    ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
            if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) {
                RETURN_ERROR(
                        32,
                        BMK_benchOutcome_t,
                        "Decompressed size cannot be determined: cannot benchmark");
            }
            if (fSize64 == ZSTD_CONTENTSIZE_ERROR) {
                RETURN_ERROR(
                        32,
                        BMK_benchOutcome_t,
                        "Error while trying to assess decompressed size: data may be invalid");
            }
            totalDSize64 += fSize64;
            srcPtr += fileSizes[fileNb];
        }
        {
            size_t const decodedSize = (size_t)totalDSize64;
            assert((U64)decodedSize == totalDSize64); /* check overflow */
            free(*resultBufferPtr);
            if (totalDSize64 > decodedSize) { /* size_t overflow */
                RETURN_ERROR(
                        32,
                        BMK_benchOutcome_t,
                        "decompressed size is too large for local system");
            }
            *resultBufferPtr = malloc(decodedSize);
            if (!(*resultBufferPtr)) {
                RETURN_ERROR(
                        33,
                        BMK_benchOutcome_t,
                        "allocation error: not enough memory");
            }
            cSize   = srcSize;
            srcSize = decodedSize;
            ratio   = (double)srcSize / (double)cSize;
        }
    }

    /* Init data blocks  */
    {
        const char* srcPtr = (const char*)srcBuffer;
        char* cPtr         = (char*)compressedBuffer;
        char* resPtr       = (char*)(*resultBufferPtr);
        U32 fileNb;
        for (nbBlocks = 0, fileNb = 0; fileNb < nbFiles; fileNb++) {
            size_t remaining              = fileSizes[fileNb];
            U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly)
                    ? 1
                    : (U32)((remaining + (blockSize - 1)) / blockSize);
            U32 const blockEnd            = nbBlocks + nbBlocksforThisFile;
            for (; nbBlocks < blockEnd; nbBlocks++) {
                size_t const thisBlockSize = MIN(remaining, blockSize);
                srcPtrs[nbBlocks]          = srcPtr;
                srcSizes[nbBlocks]         = thisBlockSize;
                cPtrs[nbBlocks]            = cPtr;
                cCapacities[nbBlocks]      = (adv->mode == BMK_decodeOnly)
                             ? thisBlockSize
                             : ZSTD_compressBound(thisBlockSize);
                resPtrs[nbBlocks]          = resPtr;
                resSizes[nbBlocks]         = (adv->mode == BMK_decodeOnly)
                                ? (size_t)ZSTD_findDecompressedSize(
                                srcPtr, thisBlockSize)
                                : thisBlockSize;
                srcPtr += thisBlockSize;
                cPtr += cCapacities[nbBlocks];
                resPtr += thisBlockSize;
                remaining -= thisBlockSize;
                if (adv->mode == BMK_decodeOnly) {
                    cSizes[nbBlocks]  = thisBlockSize;
                    benchResult.cSize = thisBlockSize;
                }
            }
        }
    }

    /* warming up `compressedBuffer` */
    if (adv->mode == BMK_decodeOnly) {
        memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
    } else {
        RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
    }

    if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) {
        OUTPUTLEVEL(
                2,
                "Warning : time measurements may be incorrect in multithreading mode... \n")
    }

    /* Bench */
    {
        U64 const crcOrig = (adv->mode == BMK_decodeOnly)
                ? 0
                : XXH64(srcBuffer, srcSize, 0);
#define NB_MARKS 4
        const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
        U32 markNb                  = 0;
        int compressionCompleted    = (adv->mode == BMK_decodeOnly);
        int decompressionCompleted  = (adv->mode == BMK_compressOnly);
        BMK_benchParams_t cbp, dbp;
        BMK_initCCtxArgs cctxprep;
        BMK_initDCtxArgs dctxprep;

        cbp.benchFn       = local_defaultCompress; /* ZSTD_compress2 */
        cbp.benchPayload  = cctx;
        cbp.initFn        = local_initCCtx; /* BMK_initCCtx */
        cbp.initPayload   = &cctxprep;
        cbp.errorFn       = ZSTD_isError;
        cbp.blockCount    = nbBlocks;
        cbp.srcBuffers    = srcPtrs;
        cbp.srcSizes      = srcSizes;
        cbp.dstBuffers    = cPtrs;
        cbp.dstCapacities = cCapacities;
        cbp.blockResults  = cSizes;

        cctxprep.cctx           = cctx;
        cctxprep.dictBuffer     = dictBuffer;
        cctxprep.dictBufferSize = dictBufferSize;
        cctxprep.cLevel         = cLevel;
        cctxprep.comprParams    = comprParams;
        cctxprep.adv            = adv;

        dbp.benchFn       = local_defaultDecompress;
        dbp.benchPayload  = dctx;
        dbp.initFn        = local_initDCtx;
        dbp.initPayload   = &dctxprep;
        dbp.errorFn       = ZSTD_isError;
        dbp.blockCount    = nbBlocks;
        dbp.srcBuffers    = (const void* const*)cPtrs;
        dbp.srcSizes      = cSizes;
        dbp.dstBuffers    = resPtrs;
        dbp.dstCapacities = resSizes;
        dbp.blockResults  = NULL;

        dctxprep.dctx           = dctx;
        dctxprep.dictBuffer     = dictBuffer;
        dctxprep.dictBufferSize = dictBufferSize;

        OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */
        assert(srcSize < UINT_MAX);
        OUTPUTLEVEL(
                2,
                "%2s-%-17.17s :%10u -> \r",
                marks[markNb],
                displayName,
                (unsigned)srcSize);

        while (!(compressionCompleted && decompressionCompleted)) {
            if (!compressionCompleted) {
                BMK_runOutcome_t const cOutcome =
                        BMK_benchTimedFn(timeStateCompress, cbp);

                if (!BMK_isSuccessful_runOutcome(cOutcome)) {
                    RETURN_ERROR(30, BMK_benchOutcome_t, "compression error");
                }

                {
                    BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
                    cSize                       = cResult.sumOfReturn;
                    ratio = (double)srcSize / (double)cSize;
                    {
                        BMK_benchResult_t newResult;
                        newResult.cSpeed =
                                (U64)((double)srcSize * TIMELOOP_NANOSEC
                                      / cResult.nanoSecPerRun);
                        benchResult.cSize = cSize;
                        if (newResult.cSpeed > benchResult.cSpeed)
                            benchResult.cSpeed = newResult.cSpeed;
                    }
                }

                {
                    int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
                    assert(cSize < UINT_MAX);
                    OUTPUTLEVEL(
                            2,
                            "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r",
                            marks[markNb],
                            displayName,
                            (unsigned)srcSize,
                            (unsigned)cSize,
                            ratioAccuracy,
                            ratio,
                            benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
                            (double)benchResult.cSpeed / MB_UNIT);
                }
                compressionCompleted =
                        BMK_isCompleted_TimedFn(timeStateCompress);
            }

            if (!decompressionCompleted) {
                BMK_runOutcome_t const dOutcome =
                        BMK_benchTimedFn(timeStateDecompress, dbp);

                if (!BMK_isSuccessful_runOutcome(dOutcome)) {
                    RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error");
                }

                {
                    BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
                    U64 const newDSpeed =
                            (U64)((double)srcSize * TIMELOOP_NANOSEC
                                  / dResult.nanoSecPerRun);
                    if (newDSpeed > benchResult.dSpeed)
                        benchResult.dSpeed = newDSpeed;
                }

                {
                    int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
                    OUTPUTLEVEL(
                            2,
                            "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r",
                            marks[markNb],
                            displayName,
                            (unsigned)srcSize,
                            (unsigned)cSize,
                            ratioAccuracy,
                            ratio,
                            benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
                            (double)benchResult.cSpeed / MB_UNIT,
                            (double)benchResult.dSpeed / MB_UNIT);
                }
                decompressionCompleted =
                        BMK_isCompleted_TimedFn(timeStateDecompress);
            }
            markNb = (markNb + 1) % NB_MARKS;
        } /* while (!(compressionCompleted && decompressionCompleted)) */

        /* CRC Checking */
        {
            const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
            U64 const crcCheck       = XXH64(resultBuffer, srcSize, 0);
            if ((adv->mode == BMK_both) && (crcOrig != crcCheck)) {
                size_t u;
                DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n",
                        displayName,
                        (unsigned)crcOrig,
                        (unsigned)crcCheck);
                for (u = 0; u < srcSize; u++) {
                    if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
                        unsigned segNb, bNb, pos;
                        size_t bacc = 0;
                        DISPLAY("Decoding error at pos %u ", (unsigned)u);
                        for (segNb = 0; segNb < nbBlocks; segNb++) {
                            if (bacc + srcSizes[segNb] > u)
                                break;
                            bacc += srcSizes[segNb];
                        }
                        pos = (U32)(u - bacc);
                        bNb = pos / (128 KB);
                        DISPLAY("(sample %u, block %u, pos %u) \n",
                                segNb,
                                bNb,
                                pos);
                        {
                            size_t const lowest = (u > 5) ? 5 : u;
                            size_t n;
                            DISPLAY("origin: ");
                            for (n = lowest; n > 0; n--)
                                DISPLAY("%02X ",
                                        ((const BYTE*)srcBuffer)[u - n]);
                            DISPLAY(" :%02X:  ", ((const BYTE*)srcBuffer)[u]);
                            for (n = 1; n < 3; n++)
                                DISPLAY("%02X ",
                                        ((const BYTE*)srcBuffer)[u + n]);
                            DISPLAY(" \n");
                            DISPLAY("decode: ");
                            for (n = lowest; n > 0; n--)
                                DISPLAY("%02X ", resultBuffer[u - n]);
                            DISPLAY(" :%02X:  ", resultBuffer[u]);
                            for (n = 1; n < 3; n++)
                                DISPLAY("%02X ", resultBuffer[u + n]);
                            DISPLAY(" \n");
                        }
                        break;
                    }
                    if (u == srcSize - 1) { /* should never happen */
                        DISPLAY("no difference detected\n");
                    }
                } /* for (u=0; u<srcSize; u++) */
            }     /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */
        }         /* CRC Checking */

        if (displayLevel
            == 1) { /* hidden display mode -q, used by python speed benchmark */
            double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
            double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
            if (adv->additionalParam) {
                OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n",
                       cLevel,
                       (int)cSize,
                       ratio,
                       cSpeed,
                       dSpeed,
                       displayName,
                       adv->additionalParam);
            } else {
                OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n",
                       cLevel,
                       (int)cSize,
                       ratio,
                       cSpeed,
                       dSpeed,
                       displayName);
            }
        }

        OUTPUTLEVEL(2, "%2i#\n", cLevel);
    } /* Bench */

    benchResult.cMem =
            (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
    return BMK_benchOutcome_setValidResult(benchResult);
}

BMK_benchOutcome_t BMK_benchMemAdvanced(
        const void* srcBuffer,
        size_t srcSize,
        void* dstBuffer,
        size_t dstCapacity,
        const size_t* fileSizes,
        unsigned nbFiles,
        int cLevel,
        const ZSTD_compressionParameters* comprParams,
        const void* dictBuffer,
        size_t dictBufferSize,
        int displayLevel,
        const char* displayName,
        const BMK_advancedParams_t* adv)

{
    int const dstParamsError =
            !dstBuffer ^ !dstCapacity; /* must be both NULL or none */

    size_t const blockSize =
            ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
                     ? adv->blockSize
                     : srcSize)
            + (!srcSize) /* avoid div by 0 */;
    U32 const maxNbBlocks =
            (U32)((srcSize + (blockSize - 1)) / blockSize) + nbFiles;

    /* these are the blockTable parameters, just split up */
    const void** const srcPtrs =
            (const void**)malloc(maxNbBlocks * sizeof(void*));
    size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));

    void** const cPtrs        = (void**)malloc(maxNbBlocks * sizeof(void*));
    size_t* const cSizes      = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
    size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));

    void** const resPtrs   = (void**)malloc(maxNbBlocks * sizeof(void*));
    size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));

    BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(
            adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
    BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(
            adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);

    ZSTD_CCtx* const cctx = ZSTD_createCCtx();
    ZSTD_DCtx* const dctx = ZSTD_createDCtx();

    const size_t maxCompressedSize = dstCapacity
            ? dstCapacity
            : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);

    void* const internalDstBuffer =
            dstBuffer ? NULL : malloc(maxCompressedSize);
    void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;

    BMK_benchOutcome_t outcome =
            BMK_benchOutcome_error(); /* error by default */

    void* resultBuffer = srcSize ? malloc(srcSize) : NULL;

    int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes
            || !cCapacities || !resPtrs || !resSizes || !timeStateCompress
            || !timeStateDecompress || !cctx || !dctx || !compressedBuffer
            || !resultBuffer;

    if (!allocationincomplete && !dstParamsError) {
        outcome = BMK_benchMemAdvancedNoAlloc(
                srcPtrs,
                srcSizes,
                cPtrs,
                cCapacities,
                cSizes,
                resPtrs,
                resSizes,
                &resultBuffer,
                compressedBuffer,
                maxCompressedSize,
                timeStateCompress,
                timeStateDecompress,
                srcBuffer,
                srcSize,
                fileSizes,
                nbFiles,
                cLevel,
                comprParams,
                dictBuffer,
                dictBufferSize,
                cctx,
                dctx,
                displayLevel,
                displayName,
                adv);
    }

    /* clean up */
    BMK_freeTimedFnState(timeStateCompress);
    BMK_freeTimedFnState(timeStateDecompress);

    ZSTD_freeCCtx(cctx);
    ZSTD_freeDCtx(dctx);

    free(internalDstBuffer);
    free(resultBuffer);

    free((void*)srcPtrs);
    free(srcSizes);
    free(cPtrs);
    free(cSizes);
    free(cCapacities);
    free(resPtrs);
    free(resSizes);

    if (allocationincomplete) {
        RETURN_ERROR(
                31, BMK_benchOutcome_t, "allocation error : not enough memory");
    }

    if (dstParamsError) {
        RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
    }
    return outcome;
}

BMK_benchOutcome_t BMK_benchMem(
        const void* srcBuffer,
        size_t srcSize,
        const size_t* fileSizes,
        unsigned nbFiles,
        int cLevel,
        const ZSTD_compressionParameters* comprParams,
        const void* dictBuffer,
        size_t dictBufferSize,
        int displayLevel,
        const char* displayName)
{
    BMK_advancedParams_t const adv = BMK_initAdvancedParams();
    return BMK_benchMemAdvanced(
            srcBuffer,
            srcSize,
            NULL,
            0,
            fileSizes,
            nbFiles,
            cLevel,
            comprParams,
            dictBuffer,
            dictBufferSize,
            displayLevel,
            displayName,
            &adv);
}

static BMK_benchOutcome_t BMK_benchCLevel(
        const void* srcBuffer,
        size_t benchedSize,
        const size_t* fileSizes,
        unsigned nbFiles,
        int cLevel,
        const ZSTD_compressionParameters* comprParams,
        const void* dictBuffer,
        size_t dictBufferSize,
        int displayLevel,
        const char* displayName,
        BMK_advancedParams_t const* const adv)
{
    const char* pch = strrchr(displayName, '\\'); /* Windows */
    if (!pch)
        pch = strrchr(displayName, '/'); /* Linux */
    if (pch)
        displayName = pch + 1;

    if (adv->realTime) {
        DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
        SET_REALTIME_PRIORITY;
    }

    if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
        OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
               ZSTD_VERSION_STRING,
               ZSTD_GIT_COMMIT_STRING,
               (unsigned)benchedSize,
               adv->nbSeconds,
               (unsigned)(adv->blockSize >> 10));

    return BMK_benchMemAdvanced(
            srcBuffer,
            benchedSize,
            NULL,
            0,
            fileSizes,
            nbFiles,
            cLevel,
            comprParams,
            dictBuffer,
            dictBufferSize,
            displayLevel,
            displayName,
            adv);
}

int BMK_syntheticTest(
        int cLevel,
        double compressibility,
        const ZSTD_compressionParameters* compressionParams,
        int displayLevel,
        const BMK_advancedParams_t* adv)
{
    char nameBuff[20]        = { 0 };
    const char* name         = nameBuff;
    size_t const benchedSize = adv->blockSize ? adv->blockSize : 10000000;
    void* srcBuffer;
    BMK_benchOutcome_t res;

    if (cLevel > ZSTD_maxCLevel()) {
        DISPLAYLEVEL(1, "Invalid Compression Level");
        return 15;
    }

    /* Memory allocation */
    srcBuffer = malloc(benchedSize);
    if (!srcBuffer) {
        DISPLAYLEVEL(1, "allocation error : not enough memory");
        return 16;
    }

    /* Fill input buffer */
    if (compressibility < 0.0) {
        LOREM_genBuffer(srcBuffer, benchedSize, 0);
        name = "Lorem ipsum";
    } else {
        RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
        formatString_u(
                nameBuff,
                sizeof(nameBuff),
                "Synthetic %u%%",
                (unsigned)(compressibility * 100));
    }

    /* Bench */
    res = BMK_benchCLevel(
            srcBuffer,
            benchedSize,
            &benchedSize /* ? */,
            1 /* ? */,
            cLevel,
            compressionParams,
            NULL,
            0, /* dictionary */
            displayLevel,
            name,
            adv);

    /* clean up */
    free(srcBuffer);

    return !BMK_isSuccessful_benchOutcome(res);
}

static size_t BMK_findMaxMem(U64 requiredMem)
{
    size_t const step = 64 MB;
    BYTE* testmem     = NULL;

    requiredMem = (((requiredMem >> 26) + 1) << 26);
    requiredMem += step;
    if (requiredMem > maxMemory)
        requiredMem = maxMemory;

    do {
        testmem = (BYTE*)malloc((size_t)requiredMem);
        requiredMem -= step;
    } while (!testmem && requiredMem > 0);

    free(testmem);
    return (size_t)(requiredMem);
}

/*! BMK_loadFiles() :
 *  Loads `buffer` with content of files listed within `fileNamesTable`.
 *  At most, fills `buffer` entirely. */
static int BMK_loadFiles(
        void* buffer,
        size_t bufferSize,
        size_t* fileSizes,
        const char* const* fileNamesTable,
        unsigned nbFiles,
        int displayLevel)
{
    size_t pos = 0, totalSize = 0;
    unsigned n;
    for (n = 0; n < nbFiles; n++) {
        U64 fileSize = UTIL_getFileSize(
                fileNamesTable[n]); /* last file may be shortened */
        if (UTIL_isDirectory(fileNamesTable[n])) {
            DISPLAYLEVEL(
                    2, "Ignoring %s directory...       \n", fileNamesTable[n]);
            fileSizes[n] = 0;
            continue;
        }
        if (fileSize == UTIL_FILESIZE_UNKNOWN) {
            DISPLAYLEVEL(
                    2,
                    "Cannot evaluate size of %s, ignoring ... \n",
                    fileNamesTable[n]);
            fileSizes[n] = 0;
            continue;
        }
        {
            FILE* const f = fopen(fileNamesTable[n], "rb");
            if (f == NULL)
                RETURN_ERROR_INT(
                        10, "impossible to open file %s", fileNamesTable[n]);
            OUTPUTLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
            if (fileSize > bufferSize - pos)
                fileSize = bufferSize - pos,
                nbFiles  = n; /* buffer too small - stop after this file */
            {
                size_t const readSize =
                        fread(((char*)buffer) + pos, 1, (size_t)fileSize, f);
                if (readSize != (size_t)fileSize)
                    RETURN_ERROR_INT(
                            11, "could not read %s", fileNamesTable[n]);
                pos += readSize;
            }
            fileSizes[n] = (size_t)fileSize;
            totalSize += (size_t)fileSize;
            fclose(f);
        }
    }

    if (totalSize == 0)
        RETURN_ERROR_INT(12, "no data to bench");
    return 0;
}

int BMK_benchFilesAdvanced(
        const char* const* fileNamesTable,
        unsigned nbFiles,
        const char* dictFileName,
        int cLevel,
        const ZSTD_compressionParameters* compressionParams,
        int displayLevel,
        const BMK_advancedParams_t* adv)
{
    void* srcBuffer = NULL;
    size_t benchedSize;
    void* dictBuffer      = NULL;
    size_t dictBufferSize = 0;
    size_t* fileSizes     = NULL;
    BMK_benchOutcome_t res;
    U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);

    if (!nbFiles) {
        DISPLAYLEVEL(1, "No Files to Benchmark");
        return 13;
    }

    if (cLevel > ZSTD_maxCLevel()) {
        DISPLAYLEVEL(1, "Invalid Compression Level");
        return 14;
    }

    if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) {
        DISPLAYLEVEL(1, "Error loading files");
        return 15;
    }

    fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
    if (!fileSizes) {
        DISPLAYLEVEL(1, "not enough memory for fileSizes");
        return 16;
    }

    /* Load dictionary */
    if (dictFileName != NULL) {
        U64 const dictFileSize = UTIL_getFileSize(dictFileName);
        if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
            DISPLAYLEVEL(
                    1,
                    "error loading %s : %s \n",
                    dictFileName,
                    strerror(errno));
            free(fileSizes);
            DISPLAYLEVEL(1, "benchmark aborted");
            return 17;
        }
        if (dictFileSize > 64 MB) {
            free(fileSizes);
            DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName);
            return 18;
        }
        dictBufferSize = (size_t)dictFileSize;
        dictBuffer     = malloc(dictBufferSize);
        if (dictBuffer == NULL) {
            free(fileSizes);
            DISPLAYLEVEL(
                    1,
                    "not enough memory for dictionary (%u bytes)",
                    (unsigned)dictBufferSize);
            return 19;
        }

        {
            int const errorCode = BMK_loadFiles(
                    dictBuffer,
                    dictBufferSize,
                    fileSizes,
                    &dictFileName /*?*/,
                    1 /*?*/,
                    displayLevel);
            if (errorCode) {
                res = BMK_benchOutcome_error();
                goto _cleanUp;
            }
        }
    }

    /* Memory allocation & restrictions */
    benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
    if ((U64)benchedSize > totalSizeToLoad)
        benchedSize = (size_t)totalSizeToLoad;
    if (benchedSize < totalSizeToLoad)
        DISPLAY("Not enough memory; testing %u MB only...\n",
                (unsigned)(benchedSize >> 20));

    srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
    if (!srcBuffer) {
        free(dictBuffer);
        free(fileSizes);
        DISPLAYLEVEL(1, "not enough memory for srcBuffer");
        return 20;
    }

    /* Load input buffer */
    {
        int const errorCode = BMK_loadFiles(
                srcBuffer,
                benchedSize,
                fileSizes,
                fileNamesTable,
                nbFiles,
                displayLevel);
        if (errorCode) {
            res = BMK_benchOutcome_error();
            goto _cleanUp;
        }
    }

    /* Bench */
    {
        char mfName[20] = { 0 };
        formatString_u(mfName, sizeof(mfName), " %u files", nbFiles);
        {
            const char* const displayName =
                    (nbFiles > 1) ? mfName : fileNamesTable[0];
            res = BMK_benchCLevel(
                    srcBuffer,
                    benchedSize,
                    fileSizes,
                    nbFiles,
                    cLevel,
                    compressionParams,
                    dictBuffer,
                    dictBufferSize,
                    displayLevel,
                    displayName,
                    adv);
        }
    }

_cleanUp:
    free(srcBuffer);
    free(dictBuffer);
    free(fileSizes);
    return !BMK_isSuccessful_benchOutcome(res);
}

int BMK_benchFiles(
        const char* const* fileNamesTable,
        unsigned nbFiles,
        const char* dictFileName,
        int cLevel,
        const ZSTD_compressionParameters* compressionParams,
        int displayLevel)
{
    BMK_advancedParams_t const adv = BMK_initAdvancedParams();
    return BMK_benchFilesAdvanced(
            fileNamesTable,
            nbFiles,
            dictFileName,
            cLevel,
            compressionParams,
            displayLevel,
            &adv);
}