Skip to content

Commit 218e9fe

Browse files
committed
added a test case for dictBuilder failure
cyclic data set makes the entropy stage fails now, onto a fix for facebook#304 ...
1 parent 0699577 commit 218e9fe

File tree

5 files changed

+120
-94
lines changed

5 files changed

+120
-94
lines changed

lib/compress/zstd_compress.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2869,7 +2869,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
28692869
if (params.nbThreads > 1) {
28702870
if (cctx->mtctx == NULL || (params.nbThreads != ZSTDMT_getNbThreads(cctx->mtctx))) {
28712871
DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbThreads=%u (previous: %u)",
2872-
params.nbThreads, ZSTDMT_getNbThreads(cctx->mtctx));
2872+
params.nbThreads, (U32)ZSTDMT_getNbThreads(cctx->mtctx));
28732873
ZSTDMT_freeCCtx(cctx->mtctx);
28742874
cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
28752875
if (cctx->mtctx == NULL) return ERROR(memory_allocation);

lib/dictBuilder/cover.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -537,8 +537,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
537537
/* Checks */
538538
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
539539
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
540-
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
541-
(COVER_MAX_SAMPLES_SIZE >> 20));
540+
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
541+
(U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
542542
return 0;
543543
}
544544
/* Zero the context */
@@ -651,12 +651,16 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
651651
}
652652

653653
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
654-
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
655-
const size_t *samplesSizes, unsigned nbSamples,
656-
ZDICT_cover_params_t parameters) {
657-
BYTE *const dict = (BYTE *)dictBuffer;
654+
void *dictBuffer, size_t dictBufferCapacity,
655+
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
656+
ZDICT_cover_params_t parameters)
657+
{
658+
BYTE* const dict = (BYTE*)dictBuffer;
658659
COVER_ctx_t ctx;
659660
COVER_map_t activeDmers;
661+
662+
/* Initialize global data */
663+
g_displayLevel = parameters.zParams.notificationLevel;
660664
/* Checks */
661665
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
662666
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
@@ -671,8 +675,6 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
671675
ZDICT_DICTSIZE_MIN);
672676
return ERROR(dstSize_tooSmall);
673677
}
674-
/* Initialize global data */
675-
g_displayLevel = parameters.zParams.notificationLevel;
676678
/* Initialize context and activeDmers */
677679
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
678680
parameters.d)) {
@@ -947,6 +949,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
947949
unsigned k;
948950
COVER_best_t best;
949951
POOL_ctx *pool = NULL;
952+
950953
/* Checks */
951954
if (kMinK < kMaxD || kMaxK < kMinK) {
952955
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
@@ -1004,7 +1007,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
10041007
data->parameters.k = k;
10051008
data->parameters.d = d;
10061009
data->parameters.steps = kSteps;
1007-
data->parameters.zParams.notificationLevel = g_displayLevel;
1010+
data->parameters.zParams.notificationLevel = displayLevel;
10081011
/* Check the parameters */
10091012
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
10101013
DISPLAYLEVEL(1, "Cover parameters incorrect\n");

lib/dictBuilder/zdict.c

Lines changed: 55 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,6 @@ static dictItem ZDICT_analyzePos(
207207
U32 cumulLength[LLIMIT] = {0};
208208
U32 savings[LLIMIT] = {0};
209209
const BYTE* b = (const BYTE*)buffer;
210-
size_t length;
211210
size_t maxLength = LLIMIT;
212211
size_t pos = suffix[start];
213212
U32 end = start;
@@ -222,26 +221,30 @@ static dictItem ZDICT_analyzePos(
222221
||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
223222
||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
224223
/* skip and mark segment */
225-
U16 u16 = MEM_read16(b+pos+4);
226-
U32 u, e = 6;
227-
while (MEM_read16(b+pos+e) == u16) e+=2 ;
228-
if (b[pos+e] == b[pos+e-1]) e++;
229-
for (u=1; u<e; u++)
224+
U16 const pattern16 = MEM_read16(b+pos+4);
225+
U32 u, patternEnd = 6;
226+
while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
227+
if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
228+
for (u=1; u<patternEnd; u++)
230229
doneMarks[pos+u] = 1;
231230
return solution;
232231
}
233232

234233
/* look forward */
235-
do {
236-
end++;
237-
length = ZDICT_count(b + pos, b + suffix[end]);
238-
} while (length >=MINMATCHLENGTH);
234+
{ size_t length;
235+
do {
236+
end++;
237+
length = ZDICT_count(b + pos, b + suffix[end]);
238+
} while (length >= MINMATCHLENGTH);
239+
}
239240

240241
/* look backward */
241-
do {
242-
length = ZDICT_count(b + pos, b + *(suffix+start-1));
243-
if (length >=MINMATCHLENGTH) start--;
244-
} while(length >= MINMATCHLENGTH);
242+
{ size_t length;
243+
do {
244+
length = ZDICT_count(b + pos, b + *(suffix+start-1));
245+
if (length >=MINMATCHLENGTH) start--;
246+
} while(length >= MINMATCHLENGTH);
247+
}
245248

246249
/* exit if not found a minimum nb of repetitions */
247250
if (end-start < minRatio) {
@@ -268,7 +271,7 @@ static dictItem ZDICT_analyzePos(
268271
U32 selectedCount = 0;
269272
U32 selectedID = currentID;
270273
for (id =refinedStart; id < refinedEnd; id++) {
271-
if (b[ suffix[id] + searchLength] != currentChar) {
274+
if (b[suffix[id] + searchLength] != currentChar) {
272275
if (currentCount > selectedCount) {
273276
selectedCount = currentCount;
274277
selectedID = currentID;
@@ -297,20 +300,23 @@ static dictItem ZDICT_analyzePos(
297300
memset(lengthList, 0, sizeof(lengthList));
298301

299302
/* look forward */
300-
do {
301-
end++;
302-
length = ZDICT_count(b + pos, b + suffix[end]);
303-
if (length >= LLIMIT) length = LLIMIT-1;
304-
lengthList[length]++;
305-
} while (length >=MINMATCHLENGTH);
303+
{ size_t length;
304+
do {
305+
end++;
306+
length = ZDICT_count(b + pos, b + suffix[end]);
307+
if (length >= LLIMIT) length = LLIMIT-1;
308+
lengthList[length]++;
309+
} while (length >=MINMATCHLENGTH);
310+
}
306311

307312
/* look backward */
308-
length = MINMATCHLENGTH;
309-
while ((length >= MINMATCHLENGTH) & (start > 0)) {
310-
length = ZDICT_count(b + pos, b + suffix[start - 1]);
311-
if (length >= LLIMIT) length = LLIMIT - 1;
312-
lengthList[length]++;
313-
if (length >= MINMATCHLENGTH) start--;
313+
{ size_t length = MINMATCHLENGTH;
314+
while ((length >= MINMATCHLENGTH) & (start > 0)) {
315+
length = ZDICT_count(b + pos, b + suffix[start - 1]);
316+
if (length >= LLIMIT) length = LLIMIT - 1;
317+
lengthList[length]++;
318+
if (length >= MINMATCHLENGTH) start--;
319+
}
314320
}
315321

316322
/* largest useful length */
@@ -345,12 +351,12 @@ static dictItem ZDICT_analyzePos(
345351
/* mark positions done */
346352
{ U32 id;
347353
for (id=start; id<end; id++) {
348-
U32 p, pEnd;
354+
U32 p, pEnd, length;
349355
U32 const testedPos = suffix[id];
350356
if (testedPos == pos)
351357
length = solution.length;
352358
else {
353-
length = ZDICT_count(b+pos, b+testedPos);
359+
length = (U32)ZDICT_count(b+pos, b+testedPos);
354360
if (length > solution.length) length = solution.length;
355361
}
356362
pEnd = (U32)(testedPos + length);
@@ -575,29 +581,30 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
575581

576582
typedef struct
577583
{
578-
ZSTD_CCtx* ref;
579-
ZSTD_CCtx* zc;
584+
ZSTD_CCtx* ref; /* contains reference to dictionary */
585+
ZSTD_CCtx* zc; /* working context */
580586
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
581587
} EStats_ress_t;
582588

583589
#define MAXREPOFFSET 1024
584590

585591
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
586-
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
587-
const void* src, size_t srcSize, U32 notificationLevel)
592+
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
593+
const void* src, size_t srcSize,
594+
U32 notificationLevel)
588595
{
589596
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
590597
size_t cSize;
591598

592599
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
593-
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
594-
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
600+
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
601+
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
595602
}
596603
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
597604
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
598605

599606
if (cSize) { /* if == 0; block is not compressible */
600-
const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
607+
const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
601608

602609
/* literals stats */
603610
{ const BYTE* bytePtr;
@@ -688,6 +695,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
688695
BYTE* dstPtr = (BYTE*)dstBuffer;
689696

690697
/* init */
698+
DEBUGLOG(4, "ZDICT_analyzeEntropy");
691699
esr.ref = ZSTD_createCCtx();
692700
esr.zc = ZSTD_createCCtx();
693701
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
@@ -713,7 +721,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
713721
goto _cleanup;
714722
} }
715723

716-
/* collect stats on all files */
724+
/* collect stats on all samples */
717725
for (u=0; u<nbFiles; u++) {
718726
ZDICT_countEStats(esr, params,
719727
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
@@ -726,7 +734,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
726734
errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
727735
if (HUF_isError(errorCode)) {
728736
eSize = ERROR(GENERIC);
729-
DISPLAYLEVEL(1, "HUF_buildCTable error \n");
737+
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
730738
goto _cleanup;
731739
}
732740
huffLog = (U32)errorCode;
@@ -850,6 +858,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
850858
U32 const notificationLevel = params.notificationLevel;
851859

852860
/* check conditions */
861+
DEBUGLOG(4, "ZDICT_finalizeDictionary");
853862
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
854863
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
855864
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
@@ -1054,18 +1063,22 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
10541063
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
10551064
{
10561065
ZDICT_cover_params_t params;
1066+
DEBUGLOG(3, "ZDICT_trainFromBuffer");
10571067
memset(&params, 0, sizeof(params));
10581068
params.d = 8;
10591069
params.steps = 4;
1060-
/* Default to level 6 since no compression level information is avaialble */
1070+
/* Default to level 6 since no compression level information is available */
10611071
params.zParams.compressionLevel = 6;
1072+
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
1073+
params.zParams.notificationLevel = ZSTD_DEBUG;
1074+
#endif
10621075
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
1063-
samplesBuffer, samplesSizes,
1064-
nbSamples, &params);
1076+
samplesBuffer, samplesSizes, nbSamples,
1077+
&params);
10651078
}
10661079

10671080
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
1068-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1081+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
10691082
{
10701083
ZDICT_params_t params;
10711084
memset(&params, 0, sizeof(params));

0 commit comments

Comments
 (0)