/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ /* largeNbDicts * This is a benchmark test tool * dedicated to the specific case of dictionary decompression * using a very large nb of dictionaries * thus suffering latency from lots of cache misses. * It's created in a bid to investigate performance and find optimizations. */ /*--- Dependencies ---*/ #include <stddef.h> /* size_t */ #include <stdlib.h> /* malloc, free, abort, qsort*/ #include <stdio.h> /* fprintf */ #include <limits.h> /* UINT_MAX */ #include <assert.h> /* assert */ #include "util.h" #include "benchfn.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zdict.h" /*--- Constants --- */ #define KB *(1<<10) #define MB *(1<<20) #define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */ #define DICTSIZE (4 KB) #define CLEVEL_DEFAULT 3 #define DICT_LOAD_METHOD ZSTD_dlm_byCopy #define BENCH_TIME_DEFAULT_S 6 #define RUN_TIME_DEFAULT_MS 1000 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS) #define DISPLAY_LEVEL_DEFAULT 3 #define BENCH_SIZE_MAX (1200 MB) /*--- Macros ---*/ #define CONTROL(c) { if (!(c)) abort(); } #undef MIN #define MIN(a,b) ((a) < (b) ? (a) : (b)) /*--- Display Macros ---*/ #define DISPLAY(...) fprintf(stdout, __VA_ARGS__) #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } } static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ /*--- buffer_t ---*/ typedef struct { void* ptr; size_t size; size_t capacity; } buffer_t; static const buffer_t kBuffNull = { NULL, 0, 0 }; /* @return : kBuffNull if any error */ static buffer_t createBuffer(size_t capacity) { assert(capacity > 0); void* const ptr = malloc(capacity); if (ptr==NULL) return kBuffNull; buffer_t buffer; buffer.ptr = ptr; buffer.capacity = capacity; buffer.size = 0; return buffer; } static void freeBuffer(buffer_t buff) { free(buff.ptr); } static void fillBuffer_fromHandle(buffer_t* buff, FILE* f) { size_t const readSize = fread(buff->ptr, 1, buff->capacity, f); buff->size = readSize; } /* @return : kBuffNull if any error */ static buffer_t createBuffer_fromFile(const char* fileName) { U64 const fileSize = UTIL_getFileSize(fileName); size_t const bufferSize = (size_t) fileSize; if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull; assert((U64)bufferSize == fileSize); /* check overflow */ { FILE* const f = fopen(fileName, "rb"); if (f == NULL) return kBuffNull; buffer_t buff = createBuffer(bufferSize); CONTROL(buff.ptr != NULL); fillBuffer_fromHandle(&buff, f); CONTROL(buff.size == buff.capacity); fclose(f); /* do nothing specific if fclose() fails */ return buff; } } /* @return : kBuffNull if any error */ static buffer_t createDictionaryBuffer(const char* dictionaryName, const void* srcBuffer, const size_t* srcBlockSizes, size_t nbBlocks, size_t requestedDictSize) { if (dictionaryName) { DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName); return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */ } else { DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n", (unsigned)requestedDictSize); void* const dictBuffer = malloc(requestedDictSize); CONTROL(dictBuffer != NULL); assert(nbBlocks <= UINT_MAX); size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize, srcBuffer, srcBlockSizes, (unsigned)nbBlocks); CONTROL(!ZSTD_isError(dictSize)); buffer_t result; result.ptr = dictBuffer; result.capacity = requestedDictSize; result.size = dictSize; return result; } } /*! BMK_loadFiles() : * Loads `buffer`, with content from files listed within `fileNamesTable`. * Fills `buffer` entirely. * @return : 0 on success, !=0 on error */ static int loadFiles(void* buffer, size_t bufferSize, size_t* fileSizes, const char* const * fileNamesTable, unsigned nbFiles) { size_t pos = 0, totalSize = 0; for (unsigned n=0; n<nbFiles; n++) { U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); if (UTIL_isDirectory(fileNamesTable[n])) { fileSizes[n] = 0; continue; } if (fileSize == UTIL_FILESIZE_UNKNOWN) { fileSizes[n] = 0; continue; } FILE* const f = fopen(fileNamesTable[n], "rb"); assert(f!=NULL); assert(pos <= bufferSize); assert(fileSize <= bufferSize - pos); { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); assert(readSize == fileSize); pos += readSize; } fileSizes[n] = (size_t)fileSize; totalSize += (size_t)fileSize; fclose(f); } assert(totalSize == bufferSize); return 0; } /*--- slice_collection_t ---*/ typedef struct { void** slicePtrs; size_t* capacities; size_t nbSlices; } slice_collection_t; static const slice_collection_t kNullCollection = { NULL, NULL, 0 }; static void freeSliceCollection(slice_collection_t collection) { free(collection.slicePtrs); free(collection.capacities); } /* shrinkSizes() : * downsizes sizes of slices within collection, according to `newSizes`. * every `newSizes` entry must be <= than its corresponding collection size */ void shrinkSizes(slice_collection_t collection, const size_t* newSizes) /* presumed same size as collection */ { size_t const nbSlices = collection.nbSlices; for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) { assert(newSizes[blockNb] <= collection.capacities[blockNb]); collection.capacities[blockNb] = newSizes[blockNb]; } } /* splitSlices() : * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize. * otherwise, creates exactly nbSlices slices, * by either truncating input (when smaller) * or repeating input from beginning */ static slice_collection_t splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices) { if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */ size_t nbSrcBlocks = 0; for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) { size_t pos = 0; while (pos <= srcSlices.capacities[ssnb]) { nbSrcBlocks++; pos += blockSize; } } if (nbSlices == 0) nbSlices = nbSrcBlocks; void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable)); size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities)); if (sliceTable == NULL || capacities == NULL) { free(sliceTable); free(capacities); return kNullCollection; } size_t ssnb = 0; for (size_t sliceNb=0; sliceNb < nbSlices; ) { ssnb = (ssnb + 1) % srcSlices.nbSlices; size_t pos = 0; char* const ptr = (char*)srcSlices.slicePtrs[ssnb]; while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) { size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos); sliceTable[sliceNb] = ptr + pos; capacities[sliceNb] = size; sliceNb++; pos += blockSize; } } slice_collection_t result; result.nbSlices = nbSlices; result.slicePtrs = sliceTable; result.capacities = capacities; return result; } static size_t sliceCollection_totalCapacity(slice_collection_t sc) { size_t totalSize = 0; for (size_t n=0; n<sc.nbSlices; n++) totalSize += sc.capacities[n]; return totalSize; } /* --- buffer collection --- */ typedef struct { buffer_t buffer; slice_collection_t slices; } buffer_collection_t; static void freeBufferCollection(buffer_collection_t bc) { freeBuffer(bc.buffer); freeSliceCollection(bc.slices); } static buffer_collection_t createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc) { size_t const bufferSize = sliceCollection_totalCapacity(sc); buffer_t buffer = createBuffer(bufferSize); CONTROL(buffer.ptr != NULL); size_t const nbSlices = sc.nbSlices; void** const slices = (void**)malloc(nbSlices * sizeof(*slices)); CONTROL(slices != NULL); size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities)); CONTROL(capacities != NULL); char* const ptr = (char*)buffer.ptr; size_t pos = 0; for (size_t n=0; n < nbSlices; n++) { capacities[n] = sc.capacities[n]; slices[n] = ptr + pos; pos += capacities[n]; } buffer_collection_t result; result.buffer = buffer; result.slices.nbSlices = nbSlices; result.slices.capacities = capacities; result.slices.slicePtrs = slices; return result; } static buffer_collection_t createBufferCollection_fromSliceCollection(slice_collection_t sc) { size_t const bufferSize = sliceCollection_totalCapacity(sc); buffer_t buffer = createBuffer(bufferSize); CONTROL(buffer.ptr != NULL); size_t const nbSlices = sc.nbSlices; void** const slices = (void**)malloc(nbSlices * sizeof(*slices)); CONTROL(slices != NULL); size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities)); CONTROL(capacities != NULL); char* const ptr = (char*)buffer.ptr; size_t pos = 0; for (size_t n=0; n < nbSlices; n++) { capacities[n] = sc.capacities[n]; slices[n] = ptr + pos; pos += capacities[n]; } for (size_t i = 0; i < nbSlices; i++) { memcpy(slices[i], sc.slicePtrs[i], sc.capacities[i]); capacities[i] = sc.capacities[i]; } buffer_collection_t result; result.buffer = buffer; result.slices.nbSlices = nbSlices; result.slices.capacities = capacities; result.slices.slicePtrs = slices; return result; } /* @return : kBuffNull if any error */ static buffer_collection_t createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles) { U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN); assert(totalSizeToLoad <= BENCH_SIZE_MAX); size_t const loadedSize = (size_t)totalSizeToLoad; assert(loadedSize > 0); void* const srcBuffer = malloc(loadedSize); assert(srcBuffer != NULL); assert(nbFiles > 0); size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes)); assert(fileSizes != NULL); /* Load input buffer */ int const errorCode = loadFiles(srcBuffer, loadedSize, fileSizes, fileNamesTable, nbFiles); assert(errorCode == 0); void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable)); assert(sliceTable != NULL); char* const ptr = (char*)srcBuffer; size_t pos = 0; unsigned fileNb = 0; for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) { sliceTable[fileNb] = ptr + pos; pos += fileSizes[fileNb]; } assert(pos == loadedSize); assert(fileNb == nbFiles); buffer_t buffer; buffer.ptr = srcBuffer; buffer.capacity = loadedSize; buffer.size = loadedSize; slice_collection_t slices; slices.slicePtrs = sliceTable; slices.capacities = fileSizes; slices.nbSlices = nbFiles; buffer_collection_t bc; bc.buffer = buffer; bc.slices = slices; return bc; } /*--- ddict_collection_t ---*/ typedef struct { ZSTD_DDict** ddicts; size_t nbDDict; } ddict_collection_t; typedef struct { ZSTD_CDict** cdicts; size_t nbCDict; } cdict_collection_t; static const cdict_collection_t kNullCDictCollection = { NULL, 0 }; static void freeCDictCollection(cdict_collection_t cdictc) { for (size_t dictNb=0; dictNb < cdictc.nbCDict; dictNb++) { ZSTD_freeCDict(cdictc.cdicts[dictNb]); } free(cdictc.cdicts); } /* returns .buffers=NULL if operation fails */ static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams) { ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*)); if (cdicts==NULL) return kNullCDictCollection; for (size_t dictNb=0; dictNb < nbCDict; dictNb++) { cdicts[dictNb] = ZSTD_createCDict_advanced2(dictBuffer, dictSize, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem); CONTROL(cdicts[dictNb] != NULL); } cdict_collection_t cdictc; cdictc.cdicts = cdicts; cdictc.nbCDict = nbCDict; return cdictc; } static const ddict_collection_t kNullDDictCollection = { NULL, 0 }; static void freeDDictCollection(ddict_collection_t ddictc) { for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) { ZSTD_freeDDict(ddictc.ddicts[dictNb]); } free(ddictc.ddicts); } /* returns .buffers=NULL if operation fails */ static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict) { ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*)); assert(ddicts != NULL); if (ddicts==NULL) return kNullDDictCollection; for (size_t dictNb=0; dictNb < nbDDict; dictNb++) { ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize); assert(ddicts[dictNb] != NULL); } ddict_collection_t ddictc; ddictc.ddicts = ddicts; ddictc.nbDDict = nbDDict; return ddictc; } /* mess with addresses, so that linear scanning dictionaries != linear address scanning */ void shuffleCDictionaries(cdict_collection_t dicts) { size_t const nbDicts = dicts.nbCDict; for (size_t r=0; r<nbDicts; r++) { size_t const d = (size_t)rand() % nbDicts; ZSTD_CDict* tmpd = dicts.cdicts[d]; dicts.cdicts[d] = dicts.cdicts[r]; dicts.cdicts[r] = tmpd; } for (size_t r=0; r<nbDicts; r++) { size_t const d1 = (size_t)rand() % nbDicts; size_t const d2 = (size_t)rand() % nbDicts; ZSTD_CDict* tmpd = dicts.cdicts[d1]; dicts.cdicts[d1] = dicts.cdicts[d2]; dicts.cdicts[d2] = tmpd; } } /* mess with addresses, so that linear scanning dictionaries != linear address scanning */ void shuffleDDictionaries(ddict_collection_t dicts) { size_t const nbDicts = dicts.nbDDict; for (size_t r=0; r<nbDicts; r++) { size_t const d = (size_t)rand() % nbDicts; ZSTD_DDict* tmpd = dicts.ddicts[d]; dicts.ddicts[d] = dicts.ddicts[r]; dicts.ddicts[r] = tmpd; } for (size_t r=0; r<nbDicts; r++) { size_t const d1 = (size_t)rand() % nbDicts; size_t const d2 = (size_t)rand() % nbDicts; ZSTD_DDict* tmpd = dicts.ddicts[d1]; dicts.ddicts[d1] = dicts.ddicts[d2]; dicts.ddicts[d2] = tmpd; } } /* --- Compression --- */ /* compressBlocks() : * @return : total compressed size of all blocks, * or 0 if error. */ static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */ slice_collection_t dstBlockBuffers, slice_collection_t srcBlockBuffers, ZSTD_CDict* cdict, int cLevel) { size_t const nbBlocks = srcBlockBuffers.nbSlices; assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices); ZSTD_CCtx* const cctx = ZSTD_createCCtx(); assert(cctx != NULL); size_t totalCSize = 0; for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) { size_t cBlockSize; if (cdict == NULL) { cBlockSize = ZSTD_compressCCtx(cctx, dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb], srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb], cLevel); } else { cBlockSize = ZSTD_compress_usingCDict(cctx, dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb], srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb], cdict); } CONTROL(!ZSTD_isError(cBlockSize)); if (cSizes) cSizes[blockNb] = cBlockSize; totalCSize += cBlockSize; } return totalCSize; } /* --- Benchmark --- */ typedef struct { ZSTD_CCtx* cctx; size_t nbDicts; size_t dictNb; cdict_collection_t dictionaries; } compressInstructions; compressInstructions createCompressInstructions(cdict_collection_t dictionaries, ZSTD_CCtx_params* cctxParams) { compressInstructions ci; ci.cctx = ZSTD_createCCtx(); CONTROL(ci.cctx != NULL); if (cctxParams) ZSTD_CCtx_setParametersUsingCCtxParams(ci.cctx, cctxParams); ci.nbDicts = dictionaries.nbCDict; ci.dictNb = 0; ci.dictionaries = dictionaries; return ci; } void freeCompressInstructions(compressInstructions ci) { ZSTD_freeCCtx(ci.cctx); } typedef struct { ZSTD_DCtx* dctx; size_t nbDicts; size_t dictNb; ddict_collection_t dictionaries; } decompressInstructions; decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries) { decompressInstructions di; di.dctx = ZSTD_createDCtx(); assert(di.dctx != NULL); di.nbDicts = dictionaries.nbDDict; di.dictNb = 0; di.dictionaries = dictionaries; return di; } void freeDecompressInstructions(decompressInstructions di) { ZSTD_freeDCtx(di.dctx); } /* benched function */ size_t compress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload) { compressInstructions* const ci = (compressInstructions*) payload; (void)dstCapacity; ZSTD_CCtx_refCDict(ci->cctx, ci->dictionaries.cdicts[ci->dictNb]); ZSTD_compress2(ci->cctx, dst, srcSize, src, srcSize); ci->dictNb = ci->dictNb + 1; if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0; return srcSize; } /* benched function */ size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload) { decompressInstructions* const di = (decompressInstructions*) payload; size_t const result = ZSTD_decompress_usingDDict(di->dctx, dst, dstCapacity, src, srcSize, di->dictionaries.ddicts[di->dictNb]); di->dictNb = di->dictNb + 1; if (di->dictNb >= di->nbDicts) di->dictNb = 0; return result; } typedef enum { fastest = 0, median = 1, } metricAggregatePref_e; /* compareFunction() : * Sort input in decreasing order when used with qsort() */ int compareFunction(const void *a, const void *b) { double x = *(const double *)a; double y = *(const double *)b; if (x < y) return 1; else if (x > y) return -1; return 0; } double aggregateData(double *data, size_t size, metricAggregatePref_e metricAggregatePref) { qsort(data, size, sizeof(*data), compareFunction); if (metricAggregatePref == fastest) return data[0]; else /* median */ return (data[(size - 1) / 2] + data[size / 2]) / 2; } static int benchMem(slice_collection_t dstBlocks, slice_collection_t srcBlocks, ddict_collection_t ddictionaries, cdict_collection_t cdictionaries, unsigned nbRounds, int benchCompression, const char *exeName, ZSTD_CCtx_params *cctxParams, metricAggregatePref_e metricAggregatePref) { assert(dstBlocks.nbSlices == srcBlocks.nbSlices); if (benchCompression) assert(cctxParams); unsigned const ms_per_round = RUN_TIME_DEFAULT_MS; unsigned const total_time_ms = nbRounds * ms_per_round; double *const speedPerRound = (double *)malloc(nbRounds * sizeof(double)); BMK_timedFnState_t* const benchState = BMK_createTimedFnState(total_time_ms, ms_per_round); decompressInstructions di = createDecompressInstructions(ddictionaries); compressInstructions ci = createCompressInstructions(cdictionaries, cctxParams); void* payload = benchCompression ? (void*)&ci : (void*)&di; BMK_benchParams_t const bp = { .benchFn = benchCompression ? compress : decompress, .benchPayload = payload, .initFn = NULL, .initPayload = NULL, .errorFn = ZSTD_isError, .blockCount = dstBlocks.nbSlices, .srcBuffers = (const void* const*) srcBlocks.slicePtrs, .srcSizes = srcBlocks.capacities, .dstBuffers = dstBlocks.slicePtrs, .dstCapacities = dstBlocks.capacities, .blockResults = NULL }; size_t roundNb = 0; for (;;) { BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp); CONTROL(BMK_isSuccessful_runOutcome(outcome)); BMK_runTime_t const result = BMK_extract_runTime(outcome); double const dTime_ns = result.nanoSecPerRun; double const dTime_sec = (double)dTime_ns / 1000000000; size_t const srcSize = result.sumOfReturn; double const speed_MBps = (double)srcSize / dTime_sec / (1 MB); speedPerRound[roundNb] = speed_MBps; if (benchCompression) DISPLAY("Compression Speed : %.1f MB/s \r", speed_MBps); else DISPLAY("Decompression Speed : %.1f MB/s \r", speed_MBps); fflush(stdout); if (BMK_isCompleted_TimedFn(benchState)) break; roundNb++; } DISPLAY("\n"); /* BMK_benchTimedFn may not run exactly nbRounds iterations */ double speedAggregated = aggregateData(speedPerRound, roundNb + 1, metricAggregatePref); if (metricAggregatePref == fastest) DISPLAY("Fastest Speed : %.1f MB/s \n", speedAggregated); else DISPLAY("Median Speed : %.1f MB/s \n", speedAggregated); char* csvFileName = malloc(strlen(exeName) + 5); strcpy(csvFileName, exeName); strcat(csvFileName, ".csv"); FILE* csvFile = fopen(csvFileName, "r"); if (!csvFile) { csvFile = fopen(csvFileName, "wt"); assert(csvFile); fprintf(csvFile, "%s\n", exeName); /* Print table headers */ fprintf( csvFile, "Compression/Decompression,Level,nbDicts,dictAttachPref,metricAggregatePref,Speed\n"); } else { fclose(csvFile); csvFile = fopen(csvFileName, "at"); assert(csvFile); } int cLevel = -1; int dictAttachPref = -1; if (benchCompression) { ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_compressionLevel, &cLevel); ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_forceAttachDict, &dictAttachPref); } fprintf(csvFile, "%s,%d,%ld,%d,%d,%.1f\n", benchCompression ? "Compression" : "Decompression", cLevel, benchCompression ? ci.nbDicts : di.nbDicts, dictAttachPref, metricAggregatePref, speedAggregated); fclose(csvFile); free(csvFileName); freeDecompressInstructions(di); freeCompressInstructions(ci); BMK_freeTimedFnState(benchState); return 0; /* success */ } /*! bench() : * fileName : file to load for benchmarking purpose * dictionary : optional (can be NULL), file to load as dictionary, * if none provided : will be calculated on the fly by the program. * @return : 0 is success, 1+ otherwise */ int bench(const char **fileNameTable, unsigned nbFiles, const char *dictionary, size_t blockSize, int clevel, unsigned nbDictMax, unsigned nbBlocks, unsigned nbRounds, int benchCompression, ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params *cctxParams, const char *exeName, metricAggregatePref_e metricAggregatePref) { int result = 0; DISPLAYLEVEL(3, "loading %u files... \n", nbFiles); buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles); CONTROL(srcs.buffer.ptr != NULL); buffer_t srcBuffer = srcs.buffer; size_t const srcSize = srcBuffer.size; DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n", (double)srcSize / (1 MB)); slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks); nbBlocks = (unsigned)(srcSlices.nbSlices); DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks); if (blockSize) DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize); DISPLAYLEVEL(3, "\n"); size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices); size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities)); CONTROL(dstCapacities != NULL); size_t dstBufferCapacity = 0; for (size_t bnb=0; bnb<nbBlocks; bnb++) { dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]); dstBufferCapacity += dstCapacities[bnb]; } buffer_t dstBuffer = createBuffer(dstBufferCapacity); CONTROL(dstBuffer.ptr != NULL); void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable)); CONTROL(sliceTable != NULL); { char* const ptr = dstBuffer.ptr; size_t pos = 0; for (size_t snb=0; snb < nbBlocks; snb++) { sliceTable[snb] = ptr + pos; pos += dstCapacities[snb]; } } slice_collection_t dstSlices; dstSlices.capacities = dstCapacities; dstSlices.slicePtrs = sliceTable; dstSlices.nbSlices = nbBlocks; /* dictionary determination */ buffer_t const dictBuffer = createDictionaryBuffer(dictionary, srcs.buffer.ptr, srcSlices.capacities, srcSlices.nbSlices, DICTSIZE); CONTROL(dictBuffer.ptr != NULL); ZSTD_CDict* const cdict = ZSTD_createCDict_advanced2(dictBuffer.ptr, dictBuffer.size, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem); CONTROL(cdict != NULL); size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel); CONTROL(cTotalSizeNoDict != 0); DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n", clevel, (double)totalSrcSlicesSize / (double)cTotalSizeNoDict, (unsigned)cTotalSizeNoDict); size_t* const cSizes = malloc(nbBlocks * sizeof(size_t)); CONTROL(cSizes != NULL); size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel); CONTROL(cTotalSize != 0); DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n", (unsigned)dictBuffer.size, (double)totalSrcSlicesSize / (double)cTotalSize, (unsigned)cTotalSize); /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */ shrinkSizes(dstSlices, cSizes); unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks; cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, dictContentType, cctxParams); CONTROL(cdictionaries.cdicts != NULL); ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts); CONTROL(ddictionaries.ddicts != NULL); if (benchCompression) { size_t const dictMem = ZSTD_sizeof_CDict(cdictionaries.cdicts[0]); size_t const allDictMem = dictMem * nbDicts; DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n", nbDicts, (double)allDictMem / (1 MB)); shuffleCDictionaries(cdictionaries); buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices); CONTROL(resultCollection.buffer.ptr != NULL); result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName, cctxParams, metricAggregatePref); freeBufferCollection(resultCollection); } else { size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, DICT_LOAD_METHOD); size_t const allDictMem = dictMem * nbDicts; DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n", nbDicts, (double)allDictMem / (1 MB)); shuffleDDictionaries(ddictionaries); buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices); CONTROL(resultCollection.buffer.ptr != NULL); result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName, NULL, metricAggregatePref); freeBufferCollection(resultCollection); } /* free all heap objects in reverse order */ freeCDictCollection(cdictionaries); freeDDictCollection(ddictionaries); free(cSizes); ZSTD_freeCDict(cdict); freeBuffer(dictBuffer); freeSliceCollection(dstSlices); freeBuffer(dstBuffer); freeSliceCollection(srcSlices); freeBufferCollection(srcs); return result; } /* --- Command Line --- */ /*! readU32FromChar() : * @return : unsigned integer value read from input in `char` format. * allows and interprets K, KB, KiB, M, MB and MiB suffix. * Will also modify `*stringPtr`, advancing it to position where it stopped reading. * Note : function will exit() program if digit sequence overflows */ static unsigned readU32FromChar(const char** stringPtr) { unsigned result = 0; while ((**stringPtr >='0') && (**stringPtr <='9')) { unsigned const max = (((unsigned)(-1)) / 10) - 1; assert(result <= max); /* check overflow */ result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ; } if ((**stringPtr=='K') || (**stringPtr=='M')) { unsigned const maxK = ((unsigned)(-1)) >> 10; assert(result <= maxK); /* check overflow */ result <<= 10; if (**stringPtr=='M') { assert(result <= maxK); /* check overflow */ result <<= 10; } (*stringPtr)++; /* skip `K` or `M` */ if (**stringPtr=='i') (*stringPtr)++; if (**stringPtr=='B') (*stringPtr)++; } return result; } /** longCommandWArg() : * check if *stringPtr is the same as longCommand. * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. * @return 0 and doesn't modify *stringPtr otherwise. */ static int longCommandWArg(const char** stringPtr, const char* longCommand) { size_t const comSize = strlen(longCommand); int const result = !strncmp(*stringPtr, longCommand, comSize); if (result) *stringPtr += comSize; return result; } int usage(const char* exeName) { DISPLAY (" \n"); DISPLAY (" %s [Options] filename(s) \n", exeName); DISPLAY (" \n"); DISPLAY ("Options : \n"); DISPLAY ("-z : benchmark compression (default) \n"); DISPLAY ("-d : benchmark decompression \n"); DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n"); DISPLAY ("-B# : split input into blocks of size # (default: no split) \n"); DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT); DISPLAY ("-D # : use # as a dictionary (default: create one) \n"); DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S); DISPLAY ("-p# : print speed for all rounds 0=fastest 1=median (default: 0) \n"); DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n"); DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n"); DISPLAY ("-h : help (this text) \n"); DISPLAY (" \n"); DISPLAY ("Advanced Options (see zstd.h for documentation) : \n"); DISPLAY ("--dedicated-dict-search\n"); DISPLAY ("--dict-content-type=#\n"); DISPLAY ("--dict-attach-pref=#\n"); return 0; } int bad_usage(const char* exeName) { DISPLAY (" bad usage : \n"); usage(exeName); return 1; } int main (int argc, const char** argv) { int recursiveMode = 0; int benchCompression = 1; int dedicatedDictSearch = 0; unsigned nbRounds = BENCH_TIME_DEFAULT_S; const char* const exeName = argv[0]; if (argc < 2) return bad_usage(exeName); const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*)); assert(nameTable != NULL); unsigned nameIdx = 0; const char* dictionary = NULL; int cLevel = CLEVEL_DEFAULT; size_t blockSize = BLOCKSIZE_DEFAULT; unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */ unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */ ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach; ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto; metricAggregatePref_e metricAggregatePref = fastest; for (int argNb = 1; argNb < argc ; argNb++) { const char* argument = argv[argNb]; if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); } if (!strcmp(argument, "-d")) { benchCompression = 0; continue; } if (!strcmp(argument, "-z")) { benchCompression = 1; continue; } if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; } if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; } if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "-p")) { metricAggregatePref = (int)readU32FromChar(&argument); continue;} if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; } if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; } if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--prefetch-cdict-tables=")) { prefetchCDictTables = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; } /* anything that's not a command is a filename */ nameTable[nameIdx++] = argument; } FileNamesTable* filenameTable; if (recursiveMode) { #ifndef UTIL_HAS_CREATEFILELIST assert(0); /* missing capability, do not run */ #endif filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */); } else { filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL); nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */ } ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); ZSTD_CCtxParams_init(cctxParams, cLevel); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables); int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName, metricAggregatePref); UTIL_freeFileNamesTable(filenameTable); free(nameTable); ZSTD_freeCCtxParams(cctxParams); return result; }