Skip to content

Commit

Permalink
HPCC-32649 Avoid writing any data to an empty compressed file
Browse files Browse the repository at this point in the history
Signed-off-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday committed Oct 22, 2024
1 parent 78461cb commit cfba5df
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 15 deletions.
2 changes: 1 addition & 1 deletion system/jlib/jfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,7 @@ static void initRenameRetrySettings()
}
catch (IException *e) // handle cases where config. not available
{
EXCLOG(e, "doRename");
EXCLOG(e, "initRenameRetrySettings");
e->Release();
renameRetries = 0;
manualRenameChk = false;
Expand Down
62 changes: 48 additions & 14 deletions system/jlib/jlzw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,31 @@ typedef unsigned long bucket_t;
typedef __int64 lbucket_t;


static std::atomic<bool> disableZeroSizeCompressedFiles{false};
static std::atomic<bool> initialisedCompressionOptions{false};

static bool allowZeroSizeCompressedFiles()
{
if (!initialisedCompressionOptions)
{
// NB: potentially could be >1 thread here, but that's ok.
try
{
Owned<IPropertyTree> globalConfig = getGlobalConfigSP();
Owned<IPropertyTree> config = getComponentConfigSP();
disableZeroSizeCompressedFiles = config->getPropBool("expert/@disableZeroSizeCompressedFiles", globalConfig->getPropBool("expert/@disableZeroSizeCompressedFiles", disableZeroSizeCompressedFiles));
}
catch (IException *e) // handle cases where config. not available
{
EXCLOG(e, "allowZeroSizeCompressedFiles");
e->Release();
}
initialisedCompressionOptions = true;
}
return !disableZeroSizeCompressedFiles.load();
}


//#define STATS
//#define TEST
#ifdef _DEBUG
Expand Down Expand Up @@ -2042,7 +2067,7 @@ class CCompressedFile : implements ICompressedFileIO, public CInterface
Owned<IExpander> expander;
MemoryAttr compressedInputBlock;
unsigned compMethod;
offset_t lastFlushPos = (offset_t)-1;
offset_t lastFlushPos = 0;
offset_t nextExpansionPos = (offset_t)-1;
offset_t startBlockPos = (offset_t)-1;
size32_t fullBlockSize = 0;
Expand Down Expand Up @@ -2466,20 +2491,29 @@ class CCompressedFile : implements ICompressedFileIO, public CInterface
overflow.clear();
throw MakeStringException(-1,"Partial row written at end of file %d of %d",ol,trailer.recordSize);
}
flush();
trailer.datacrc = trailer.crc;
if (setcrc) {
indexbuf.append(sizeof(trailer)-sizeof(trailer.crc),&trailer);
trailer.crc = crc32((const char *)indexbuf.toByteArray(),
indexbuf.length(),trailer.crc);
indexbuf.append(trailer.crc);
}
else {
trailer.datacrc = 0;
trailer.crc = ~0U;
indexbuf.append(sizeof(trailer),&trailer);

//Avoid writing out a header/footer if the file is empty
if ((trailer.expandedSize != 0) || !allowZeroSizeCompressedFiles())
{
//Backward compatibility - force a single index entry, even if the file is empty
if (trailer.expandedSize == 0)
lastFlushPos = (offset_t)-1;
flush();
trailer.datacrc = trailer.crc;
if (setcrc) {
indexbuf.append(sizeof(trailer)-sizeof(trailer.crc),&trailer);
trailer.crc = crc32((const char *)indexbuf.toByteArray(),
indexbuf.length(),trailer.crc);
indexbuf.append(trailer.crc);
}
else {
trailer.datacrc = 0;
trailer.crc = ~0U;
indexbuf.append(sizeof(trailer),&trailer);
}
checkedwrite(trailer.indexPos,indexbuf.length(),indexbuf.toByteArray());
}
checkedwrite(trailer.indexPos,indexbuf.length(),indexbuf.toByteArray());

indexbuf.clear();
if (fileio)
fileio->close();
Expand Down
36 changes: 36 additions & 0 deletions testing/regress/ecl/emptycompressed.ecl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*##############################################################################
HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
############################################################################## */

import Std.System.Thorlib;
import Std.File AS FileServices;
import Std.Str;
import $.setup;

prefix := setup.Files(false, false).QueryFilePrefix;

filename := prefix+'empty';
r := { string x};

ds := DATASET(0, transform(r, SELF := []));

iclData0 := DATASET(filename, r, flat);

ordered(
output(ds,,filename, overwrite, compressed);
output(iclData0);
FileServices.DeleteLogicalFile(filename,true),
);
4 changes: 4 additions & 0 deletions testing/regress/ecl/key/emptycompressed.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<Dataset name='Result 1'>
</Dataset>
<Dataset name='Result 2'>
</Dataset>

0 comments on commit cfba5df

Please sign in to comment.