From 6f8089482391e999fa772e7b8579ff5f89ec79c1 Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Tue, 25 Jun 2019 13:16:25 +0200 Subject: [PATCH] Fix matchfinder limitation --- src/lz4ultra.c | 2 +- src/matchfinder.c | 20 +++++++------------- src/shrink_context.h | 2 ++ 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/lz4ultra.c b/src/lz4ultra.c index a737f02..4a5c52f 100755 --- a/src/lz4ultra.c +++ b/src/lz4ultra.c @@ -50,7 +50,7 @@ #define OPT_INDEP_BLOCKS 8 #define OPT_LEGACY_FRAMES 16 -#define TOOL_VERSION "1.2.1" +#define TOOL_VERSION "1.2.2" /*---------------------------------------------------------------------------*/ diff --git a/src/matchfinder.c b/src/matchfinder.c index 99e24b2..45397de 100644 --- a/src/matchfinder.c +++ b/src/matchfinder.c @@ -197,33 +197,27 @@ static int lz4ultra_find_matches_at(lz4ultra_compressor *pCompressor, const int /* Ascend until we reach a visited interval, the root, or a child of the * root. Link unvisited intervals to the current suffix as we go. */ while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) { - intervals[ref & POS_MASK] = nOffset; + intervals[ref & POS_MASK] = nOffset | VISITED_FLAG; ref = super_ref; } if (super_ref == 0) { /* In this case, the current interval may be any of: * (1) the root; - * (2) an unvisited child of the root; - * (3) an interval last visited by suffix 0 - * - * We could avoid the ambiguity with (3) by using an lcp - * placeholder value other than 0 to represent "visited", but - * it's fastest to use 0. So we just don't allow matches with - * position 0. */ + * (2) an unvisited child of the root */ if (ref != 0) /* Not the root? */ - intervals[ref & POS_MASK] = nOffset; + intervals[ref & POS_MASK] = nOffset | VISITED_FLAG; return 0; } /* Ascend indirectly via pos_data[] links. */ - match_pos = super_ref; + match_pos = super_ref & EXCL_VISITED_MASK; matchptr = pMatches; for (;;) { while ((super_ref = pos_data[match_pos]) > ref) - match_pos = intervals[super_ref & POS_MASK]; - intervals[ref & POS_MASK] = nOffset; + match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK; + intervals[ref & POS_MASK] = nOffset | VISITED_FLAG; pos_data[match_pos] = (unsigned long long)ref; if ((matchptr - pMatches) < nMaxMatches) { @@ -239,7 +233,7 @@ static int lz4ultra_find_matches_at(lz4ultra_compressor *pCompressor, const int if (super_ref == 0) break; ref = super_ref; - match_pos = intervals[ref & POS_MASK]; + match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK; } return (int)(matchptr - pMatches); diff --git a/src/shrink_context.h b/src/shrink_context.h index cbe029f..1c12534 100644 --- a/src/shrink_context.h +++ b/src/shrink_context.h @@ -40,6 +40,8 @@ #define LCP_SHIFT (39-LCP_BITS) #define LCP_MASK (((1ULL<