/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ /* largeNbDicts * This is a benchmark test tool * dedicated to the specific case of dictionary decompression * using a very large nb of dictionaries * thus suffering latency from lots of cache misses. * It's created in a bid to investigate performance and find optimizations. */ /*--- Dependencies ---*/ #include /* size_t */ #include /* malloc, free, abort, qsort*/ #include /* fprintf */ #include /* UINT_MAX */ #include /* assert */ #include "util.h" #include "benchfn.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zdict.h" /*--- Constants --- */ #define KB *(1<<10) #define MB *(1<<20) #define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */ #define DICTSIZE (4 KB) #define CLEVEL_DEFAULT 3 #define DICT_LOAD_METHOD ZSTD_dlm_byCopy #define BENCH_TIME_DEFAULT_S 6 #define RUN_TIME_DEFAULT_MS 1000 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS) #define DISPLAY_LEVEL_DEFAULT 3 #define BENCH_SIZE_MAX (1200 MB) /*--- Macros ---*/ #define CONTROL(c) { if (!(c)) abort(); } #undef MIN #define MIN(a,b) ((a) < (b) ? (a) : (b)) /*--- Display Macros ---*/ #define DISPLAY(...) fprintf(stdout, __VA_ARGS__) #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } } static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ /*--- buffer_t ---*/ typedef struct { void* ptr; size_t size; size_t capacity; } buffer_t; static const buffer_t kBuffNull = { NULL, 0, 0 }; /* @return : kBuffNull if any error */ static buffer_t createBuffer(size_t capacity) { assert(capacity > 0); void* const ptr = malloc(capacity); if (ptr==NULL) return kBuffNull; buffer_t buffer; buffer.ptr = ptr; buffer.capacity = capacity; buffer.size = 0; return buffer; } static void freeBuffer(buffer_t buff) { free(buff.ptr); } static void fillBuffer_fromHandle(buffer_t* buff, FILE* f) { size_t const readSize = fread(buff->ptr, 1, buff->capacity, f); buff->size = readSize; } /* @return : kBuffNull if any error */ static buffer_t createBuffer_fromFile(const char* fileName) { U64 const fileSize = UTIL_getFileSize(fileName); size_t const bufferSize = (size_t) fileSize; if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull; assert((U64)bufferSize == fileSize); /* check overflow */ { FILE* const f = fopen(fileName, "rb"); if (f == NULL) return kBuffNull; buffer_t buff = createBuffer(bufferSize); CONTROL(buff.ptr != NULL); fillBuffer_fromHandle(&buff, f); CONTROL(buff.size == buff.capacity); fclose(f); /* do nothing specific if fclose() fails */ return buff; } } /* @return : kBuffNull if any error */ static buffer_t createDictionaryBuffer(const char* dictionaryName, const void* srcBuffer, const size_t* srcBlockSizes, size_t nbBlocks, size_t requestedDictSize) { if (dictionaryName) { DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName); return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */ } else { DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n", (unsigned)requestedDictSize); void* const dictBuffer = malloc(requestedDictSize); CONTROL(dictBuffer != NULL); assert(nbBlocks <= UINT_MAX); size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize, srcBuffer, srcBlockSizes, (unsigned)nbBlocks); CONTROL(!ZSTD_isError(dictSize)); buffer_t result; result.ptr = dictBuffer; result.capacity = requestedDictSize; result.size = dictSize; return result; } } /*! BMK_loadFiles() : * Loads `buffer`, with content from files listed within `fileNamesTable`. * Fills `buffer` entirely. * @return : 0 on success, !=0 on error */ static int loadFiles(void* buffer, size_t bufferSize, size_t* fileSizes, const char* const * fileNamesTable, unsigned nbFiles) { size_t pos = 0, totalSize = 0; for (unsigned n=0; n 0); void* const srcBuffer = malloc(loadedSize); assert(srcBuffer != NULL); assert(nbFiles > 0); size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes)); assert(fileSizes != NULL); /* Load input buffer */ int const errorCode = loadFiles(srcBuffer, loadedSize, fileSizes, fileNamesTable, nbFiles); assert(errorCode == 0); void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable)); assert(sliceTable != NULL); char* const ptr = (char*)srcBuffer; size_t pos = 0; unsigned fileNb = 0; for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) { sliceTable[fileNb] = ptr + pos; pos += fileSizes[fileNb]; } assert(pos == loadedSize); assert(fileNb == nbFiles); buffer_t buffer; buffer.ptr = srcBuffer; buffer.capacity = loadedSize; buffer.size = loadedSize; slice_collection_t slices; slices.slicePtrs = sliceTable; slices.capacities = fileSizes; slices.nbSlices = nbFiles; buffer_collection_t bc; bc.buffer = buffer; bc.slices = slices; return bc; } /*--- ddict_collection_t ---*/ typedef struct { ZSTD_DDict** ddicts; size_t nbDDict; } ddict_collection_t; typedef struct { ZSTD_CDict** cdicts; size_t nbCDict; } cdict_collection_t; static const cdict_collection_t kNullCDictCollection = { NULL, 0 }; static void freeCDictCollection(cdict_collection_t cdictc) { for (size_t dictNb=0; dictNb < cdictc.nbCDict; dictNb++) { ZSTD_freeCDict(cdictc.cdicts[dictNb]); } free(cdictc.cdicts); } /* returns .buffers=NULL if operation fails */ static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams) { ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*)); if (cdicts==NULL) return kNullCDictCollection; for (size_t dictNb=0; dictNb < nbCDict; dictNb++) { cdicts[dictNb] = ZSTD_createCDict_advanced2(dictBuffer, dictSize, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem); CONTROL(cdicts[dictNb] != NULL); } cdict_collection_t cdictc; cdictc.cdicts = cdicts; cdictc.nbCDict = nbCDict; return cdictc; } static const ddict_collection_t kNullDDictCollection = { NULL, 0 }; static void freeDDictCollection(ddict_collection_t ddictc) { for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) { ZSTD_freeDDict(ddictc.ddicts[dictNb]); } free(ddictc.ddicts); } /* returns .buffers=NULL if operation fails */ static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict) { ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*)); assert(ddicts != NULL); if (ddicts==NULL) return kNullDDictCollection; for (size_t dictNb=0; dictNb < nbDDict; dictNb++) { ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize); assert(ddicts[dictNb] != NULL); } ddict_collection_t ddictc; ddictc.ddicts = ddicts; ddictc.nbDDict = nbDDict; return ddictc; } /* mess with addresses, so that linear scanning dictionaries != linear address scanning */ void shuffleCDictionaries(cdict_collection_t dicts) { size_t const nbDicts = dicts.nbCDict; for (size_t r=0; rcctx, ci->dictionaries.cdicts[ci->dictNb]); ZSTD_compress2(ci->cctx, dst, srcSize, src, srcSize); ci->dictNb = ci->dictNb + 1; if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0; return srcSize; } /* benched function */ size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload) { decompressInstructions* const di = (decompressInstructions*) payload; size_t const result = ZSTD_decompress_usingDDict(di->dctx, dst, dstCapacity, src, srcSize, di->dictionaries.ddicts[di->dictNb]); di->dictNb = di->dictNb + 1; if (di->dictNb >= di->nbDicts) di->dictNb = 0; return result; } typedef enum { fastest = 0, median = 1, } metricAggregatePref_e; /* compareFunction() : * Sort input in decreasing order when used with qsort() */ int compareFunction(const void *a, const void *b) { double x = *(const double *)a; double y = *(const double *)b; if (x < y) return 1; else if (x > y) return -1; return 0; } double aggregateData(double *data, size_t size, metricAggregatePref_e metricAggregatePref) { qsort(data, size, sizeof(*data), compareFunction); if (metricAggregatePref == fastest) return data[0]; else /* median */ return (data[(size - 1) / 2] + data[size / 2]) / 2; } static int benchMem(slice_collection_t dstBlocks, slice_collection_t srcBlocks, ddict_collection_t ddictionaries, cdict_collection_t cdictionaries, unsigned nbRounds, int benchCompression, const char *exeName, ZSTD_CCtx_params *cctxParams, metricAggregatePref_e metricAggregatePref) { assert(dstBlocks.nbSlices == srcBlocks.nbSlices); if (benchCompression) assert(cctxParams); unsigned const ms_per_round = RUN_TIME_DEFAULT_MS; unsigned const total_time_ms = nbRounds * ms_per_round; double *const speedPerRound = (double *)malloc(nbRounds * sizeof(double)); BMK_timedFnState_t* const benchState = BMK_createTimedFnState(total_time_ms, ms_per_round); decompressInstructions di = createDecompressInstructions(ddictionaries); compressInstructions ci = createCompressInstructions(cdictionaries, cctxParams); void* payload = benchCompression ? (void*)&ci : (void*)&di; BMK_benchParams_t const bp = { .benchFn = benchCompression ? compress : decompress, .benchPayload = payload, .initFn = NULL, .initPayload = NULL, .errorFn = ZSTD_isError, .blockCount = dstBlocks.nbSlices, .srcBuffers = (const void* const*) srcBlocks.slicePtrs, .srcSizes = srcBlocks.capacities, .dstBuffers = dstBlocks.slicePtrs, .dstCapacities = dstBlocks.capacities, .blockResults = NULL }; size_t roundNb = 0; for (;;) { BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp); CONTROL(BMK_isSuccessful_runOutcome(outcome)); BMK_runTime_t const result = BMK_extract_runTime(outcome); double const dTime_ns = result.nanoSecPerRun; double const dTime_sec = (double)dTime_ns / 1000000000; size_t const srcSize = result.sumOfReturn; double const speed_MBps = (double)srcSize / dTime_sec / (1 MB); speedPerRound[roundNb] = speed_MBps; if (benchCompression) DISPLAY("Compression Speed : %.1f MB/s \r", speed_MBps); else DISPLAY("Decompression Speed : %.1f MB/s \r", speed_MBps); fflush(stdout); if (BMK_isCompleted_TimedFn(benchState)) break; roundNb++; } DISPLAY("\n"); /* BMK_benchTimedFn may not run exactly nbRounds iterations */ double speedAggregated = aggregateData(speedPerRound, roundNb + 1, metricAggregatePref); if (metricAggregatePref == fastest) DISPLAY("Fastest Speed : %.1f MB/s \n", speedAggregated); else DISPLAY("Median Speed : %.1f MB/s \n", speedAggregated); char* csvFileName = malloc(strlen(exeName) + 5); strcpy(csvFileName, exeName); strcat(csvFileName, ".csv"); FILE* csvFile = fopen(csvFileName, "r"); if (!csvFile) { csvFile = fopen(csvFileName, "wt"); assert(csvFile); fprintf(csvFile, "%s\n", exeName); /* Print table headers */ fprintf( csvFile, "Compression/Decompression,Level,nbDicts,dictAttachPref,metricAggregatePref,Speed\n"); } else { fclose(csvFile); csvFile = fopen(csvFileName, "at"); assert(csvFile); } int cLevel = -1; int dictAttachPref = -1; if (benchCompression) { ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_compressionLevel, &cLevel); ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_forceAttachDict, &dictAttachPref); } fprintf(csvFile, "%s,%d,%ld,%d,%d,%.1f\n", benchCompression ? "Compression" : "Decompression", cLevel, benchCompression ? ci.nbDicts : di.nbDicts, dictAttachPref, metricAggregatePref, speedAggregated); fclose(csvFile); free(csvFileName); freeDecompressInstructions(di); freeCompressInstructions(ci); BMK_freeTimedFnState(benchState); return 0; /* success */ } /*! bench() : * fileName : file to load for benchmarking purpose * dictionary : optional (can be NULL), file to load as dictionary, * if none provided : will be calculated on the fly by the program. * @return : 0 is success, 1+ otherwise */ int bench(const char **fileNameTable, unsigned nbFiles, const char *dictionary, size_t blockSize, int clevel, unsigned nbDictMax, unsigned nbBlocks, unsigned nbRounds, int benchCompression, ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params *cctxParams, const char *exeName, metricAggregatePref_e metricAggregatePref) { int result = 0; DISPLAYLEVEL(3, "loading %u files... \n", nbFiles); buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles); CONTROL(srcs.buffer.ptr != NULL); buffer_t srcBuffer = srcs.buffer; size_t const srcSize = srcBuffer.size; DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n", (double)srcSize / (1 MB)); slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks); nbBlocks = (unsigned)(srcSlices.nbSlices); DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks); if (blockSize) DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize); DISPLAYLEVEL(3, "\n"); size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices); size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities)); CONTROL(dstCapacities != NULL); size_t dstBufferCapacity = 0; for (size_t bnb=0; bnb='0') && (**stringPtr <='9')) { unsigned const max = (((unsigned)(-1)) / 10) - 1; assert(result <= max); /* check overflow */ result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ; } if ((**stringPtr=='K') || (**stringPtr=='M')) { unsigned const maxK = ((unsigned)(-1)) >> 10; assert(result <= maxK); /* check overflow */ result <<= 10; if (**stringPtr=='M') { assert(result <= maxK); /* check overflow */ result <<= 10; } (*stringPtr)++; /* skip `K` or `M` */ if (**stringPtr=='i') (*stringPtr)++; if (**stringPtr=='B') (*stringPtr)++; } return result; } /** longCommandWArg() : * check if *stringPtr is the same as longCommand. * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. * @return 0 and doesn't modify *stringPtr otherwise. */ static int longCommandWArg(const char** stringPtr, const char* longCommand) { size_t const comSize = strlen(longCommand); int const result = !strncmp(*stringPtr, longCommand, comSize); if (result) *stringPtr += comSize; return result; } int usage(const char* exeName) { DISPLAY (" \n"); DISPLAY (" %s [Options] filename(s) \n", exeName); DISPLAY (" \n"); DISPLAY ("Options : \n"); DISPLAY ("-z : benchmark compression (default) \n"); DISPLAY ("-d : benchmark decompression \n"); DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n"); DISPLAY ("-B# : split input into blocks of size # (default: no split) \n"); DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT); DISPLAY ("-D # : use # as a dictionary (default: create one) \n"); DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S); DISPLAY ("-p# : print speed for all rounds 0=fastest 1=median (default: 0) \n"); DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n"); DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n"); DISPLAY ("-h : help (this text) \n"); DISPLAY (" \n"); DISPLAY ("Advanced Options (see zstd.h for documentation) : \n"); DISPLAY ("--dedicated-dict-search\n"); DISPLAY ("--dict-content-type=#\n"); DISPLAY ("--dict-attach-pref=#\n"); return 0; } int bad_usage(const char* exeName) { DISPLAY (" bad usage : \n"); usage(exeName); return 1; } int main (int argc, const char** argv) { int recursiveMode = 0; int benchCompression = 1; int dedicatedDictSearch = 0; unsigned nbRounds = BENCH_TIME_DEFAULT_S; const char* const exeName = argv[0]; if (argc < 2) return bad_usage(exeName); const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*)); assert(nameTable != NULL); unsigned nameIdx = 0; const char* dictionary = NULL; int cLevel = CLEVEL_DEFAULT; size_t blockSize = BLOCKSIZE_DEFAULT; unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */ unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */ ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach; ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto; metricAggregatePref_e metricAggregatePref = fastest; for (int argNb = 1; argNb < argc ; argNb++) { const char* argument = argv[argNb]; if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); } if (!strcmp(argument, "-d")) { benchCompression = 0; continue; } if (!strcmp(argument, "-z")) { benchCompression = 1; continue; } if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; } if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; } if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "-p")) { metricAggregatePref = (int)readU32FromChar(&argument); continue;} if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; } if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; } if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--prefetch-cdict-tables=")) { prefetchCDictTables = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; } /* anything that's not a command is a filename */ nameTable[nameIdx++] = argument; } FileNamesTable* filenameTable; if (recursiveMode) { #ifndef UTIL_HAS_CREATEFILELIST assert(0); /* missing capability, do not run */ #endif filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */); } else { filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL); nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */ } ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); ZSTD_CCtxParams_init(cctxParams, cLevel); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables); int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName, metricAggregatePref); UTIL_freeFileNamesTable(filenameTable); free(nameTable); ZSTD_freeCCtxParams(cctxParams); return result; }